diff --git a/data/resources/shaders/dolphinfx/bloom.glsl b/data/resources/shaders/dolphinfx/bloom.glsl deleted file mode 100644 index 1e0b7f28a..000000000 --- a/data/resources/shaders/dolphinfx/bloom.glsl +++ /dev/null @@ -1,238 +0,0 @@ -/*===============================================================================*\ -|######################## [Dolphin FX Suite 2.20] #######################| -|########################## By Asmodean ##########################| -|| || -|| This program is free software; you can redistribute it and/or || -|| modify it under the terms of the GNU General Public License || -|| as published by the Free Software Foundation; either version 2 || -|| of the License, or (at your option) any later version. || -|| || -|| This program is distributed in the hope that it will be useful, || -|| but WITHOUT ANY WARRANTY; without even the implied warranty of || -|| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the || -|| GNU General Public License for more details. (C)2015 || -|| || -|#################################################################################| -\*===============================================================================*/ - -// Sourced from https://raw.githubusercontent.com/Asmodean-/dolphin/89d640cd557189bb5f921fc219150c74c39bdc55/Data/Sys/Shaders/DolphinFX.glsl with modifications. - -/* -[configuration] - -[OptionRangeInteger] -GUIName = BloomType -OptionName = A_BLOOM_TYPE -MinValue = 0 -MaxValue = 5 -StepAmount = 1 -DefaultValue = 0 - -[OptionRangeFloat] -GUIName = BloomStrength -OptionName = B_BLOOM_STRENGTH -MinValue = 0.000 -MaxValue = 1.000 -StepAmount = 0.001 -DefaultValue = 0.220 - -[OptionRangeFloat] -GUIName = BlendStrength -OptionName = C_BLEND_STRENGTH -MinValue = 0.000 -MaxValue = 1.200 -StepAmount = 0.010 -DefaultValue = 1.000 - -[OptionRangeFloat] -GUIName = BloomDefocus -OptionName = D_B_DEFOCUS -MinValue = 1.000 -MaxValue = 4.000 -StepAmount = 0.100 -DefaultValue = 2.000 - -[OptionRangeFloat] -GUIName = BloomWidth -OptionName = D_BLOOM_WIDTH -MinValue = 1.000 -MaxValue = 8.000 -StepAmount = 0.100 -DefaultValue = 3.200 - -[OptionRangeFloat] -GUIName = BloomReds -OptionName = E_BLOOM_REDS -MinValue = 0.000 -MaxValue = 0.500 -StepAmount = 0.001 -DefaultValue = 0.020 - -[OptionRangeFloat] -GUIName = BloomGreens -OptionName = F_BLOOM_GREENS -MinValue = 0.000 -MaxValue = 0.500 -StepAmount = 0.001 -DefaultValue = 0.010 - -[OptionRangeFloat] -GUIName = BloomBlues -OptionName = G_BLOOM_BLUES -MinValue = 0.000 -MaxValue = 0.500 -StepAmount = 0.001 -DefaultValue = 0.010 - -[/configuration] -*/ - -//Average relative luminance -CONSTANT float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750); -float AvgLuminance(float3 color) -{ - return sqrt( - (color.x * color.x * lumCoeff.x) + - (color.y * color.y * lumCoeff.y) + - (color.z * color.z * lumCoeff.z)); -} - -float smootherstep(float a, float b, float x) -{ - x = saturate((x - a) / (b - a)); - return x*x*x*(x*(x * 6.0 - 15.0) + 10.0); -} - -float3 BlendAddLight(float3 bloom, float3 blend) -{ - return saturate(bloom + blend); -} - -float3 BlendScreen(float3 bloom, float3 blend) -{ - return (bloom + blend) - (bloom * blend); -} - -float3 BlendAddGlow(float3 bloom, float3 blend) -{ - float glow = smootherstep(0.0, 1.0, AvgLuminance(bloom)); - return lerp(saturate(bloom + blend), - (blend + blend) - (blend * blend), glow); -} - -float3 BlendGlow(float3 bloom, float3 blend) -{ - float glow = smootherstep(0.0, 1.0, AvgLuminance(bloom)); - return lerp((bloom + blend) - (bloom * blend), - (blend + blend) - (blend * blend), glow); -} - -float3 BlendLuma(float3 bloom, float3 blend) -{ - float lumavg = smootherstep(0.0, 1.0, AvgLuminance(bloom + blend)); - return lerp((bloom * blend), (1.0 - - ((1.0 - bloom) * (1.0 - blend))), lumavg); -} - -float3 BlendOverlay(float3 bloom, float3 blend) -{ - float3 overlay = step(0.5, bloom); - return lerp((bloom * blend * 2.0), (1.0 - (2.0 * - (1.0 - bloom) * (1.0 - blend))), overlay); -} - -float3 BloomCorrection(float3 color) -{ - float3 bloom = color; - - bloom.r = 2.0 / 3.0 * (1.0 - (bloom.r * bloom.r)); - bloom.g = 2.0 / 3.0 * (1.0 - (bloom.g * bloom.g)); - bloom.b = 2.0 / 3.0 * (1.0 - (bloom.b * bloom.b)); - - bloom.r = saturate(color.r + GetOption(E_BLOOM_REDS) * bloom.r); - bloom.g = saturate(color.g + GetOption(F_BLOOM_GREENS) * bloom.g); - bloom.b = saturate(color.b + GetOption(G_BLOOM_BLUES) * bloom.b); - - color = saturate(bloom); - - return color; -} - -float4 PyramidFilter(float2 texcoord, float2 width) -{ - float4 X = SampleLocation(texcoord + float2(0.5, 0.5) * width); - float4 Y = SampleLocation(texcoord + float2(-0.5, 0.5) * width); - float4 Z = SampleLocation(texcoord + float2(0.5, -0.5) * width); - float4 W = SampleLocation(texcoord + float2(-0.5, -0.5) * width); - - return (X + Y + Z + W) / 4.0; -} - -float3 Blend(float3 bloom, float3 blend) -{ - if (GetOption(A_BLOOM_TYPE) == 0) { return BlendGlow(bloom, blend); } - else if (GetOption(A_BLOOM_TYPE) == 1) { return BlendAddGlow(bloom, blend); } - else if (GetOption(A_BLOOM_TYPE) == 2) { return BlendAddLight(bloom, blend); } - else if (GetOption(A_BLOOM_TYPE) == 3) { return BlendScreen(bloom, blend); } - else if (GetOption(A_BLOOM_TYPE) == 4) { return BlendLuma(bloom, blend); } - else /*if (GetOption(A_BLOOM_TYPE) == 5) */ { return BlendOverlay(bloom, blend); } -} - -void main() -{ - float4 color = Sample(); - float2 texcoord = GetCoordinates(); - float2 pixelSize = GetInvResolution(); - - float anflare = 4.0; - - float2 defocus = float2(GetOption(D_B_DEFOCUS), GetOption(D_B_DEFOCUS)); - float4 bloom = PyramidFilter(texcoord, pixelSize * defocus); - - float2 dx = float2(pixelSize.x * GetOption(D_BLOOM_WIDTH), 0.0); - float2 dy = float2(0.0, pixelSize.y * GetOption(D_BLOOM_WIDTH)); - - float2 mdx = mul(dx, 2.0); - float2 mdy = mul(dy, 2.0); - - float4 blend = bloom * 0.22520613262190495; - - blend += 0.002589001911021066 * SampleLocation(texcoord - mdx + mdy); - blend += 0.010778807494659370 * SampleLocation(texcoord - dx + mdy); - blend += 0.024146616900339800 * SampleLocation(texcoord + mdy); - blend += 0.010778807494659370 * SampleLocation(texcoord + dx + mdy); - blend += 0.002589001911021066 * SampleLocation(texcoord + mdx + mdy); - - blend += 0.010778807494659370 * SampleLocation(texcoord - mdx + dy); - blend += 0.044875475183061630 * SampleLocation(texcoord - dx + dy); - blend += 0.100529757860782610 * SampleLocation(texcoord + dy); - blend += 0.044875475183061630 * SampleLocation(texcoord + dx + dy); - blend += 0.010778807494659370 * SampleLocation(texcoord + mdx + dy); - - blend += 0.024146616900339800 * SampleLocation(texcoord - mdx); - blend += 0.100529757860782610 * SampleLocation(texcoord - dx); - blend += 0.100529757860782610 * SampleLocation(texcoord + dx); - blend += 0.024146616900339800 * SampleLocation(texcoord + mdx); - - blend += 0.010778807494659370 * SampleLocation(texcoord - mdx - dy); - blend += 0.044875475183061630 * SampleLocation(texcoord - dx - dy); - blend += 0.100529757860782610 * SampleLocation(texcoord - dy); - blend += 0.044875475183061630 * SampleLocation(texcoord + dx - dy); - blend += 0.010778807494659370 * SampleLocation(texcoord + mdx - dy); - - blend += 0.002589001911021066 * SampleLocation(texcoord - mdx - mdy); - blend += 0.010778807494659370 * SampleLocation(texcoord - dx - mdy); - blend += 0.024146616900339800 * SampleLocation(texcoord - mdy); - blend += 0.010778807494659370 * SampleLocation(texcoord + dx - mdy); - blend += 0.002589001911021066 * SampleLocation(texcoord + mdx - mdy); - blend = lerp(color, blend, GetOption(C_BLEND_STRENGTH)); - - bloom.xyz = Blend(bloom.xyz, blend.xyz); - bloom.xyz = BloomCorrection(bloom.xyz); - - color.a = AvgLuminance(color.xyz); - bloom.a = AvgLuminance(bloom.xyz); - bloom.a *= anflare; - - SetOutput(lerp(color, bloom, GetOption(B_BLOOM_STRENGTH))); -} diff --git a/data/resources/shaders/dolphinfx/celshading.glsl b/data/resources/shaders/dolphinfx/celshading.glsl deleted file mode 100644 index b1d80b7af..000000000 --- a/data/resources/shaders/dolphinfx/celshading.glsl +++ /dev/null @@ -1,174 +0,0 @@ -/*===============================================================================*\ -|######################## [Dolphin FX Suite 2.20] #######################| -|########################## By Asmodean ##########################| -|| || -|| This program is free software; you can redistribute it and/or || -|| modify it under the terms of the GNU General Public License || -|| as published by the Free Software Foundation; either version 2 || -|| of the License, or (at your option) any later version. || -|| || -|| This program is distributed in the hope that it will be useful, || -|| but WITHOUT ANY WARRANTY; without even the implied warranty of || -|| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the || -|| GNU General Public License for more details. (C)2015 || -|| || -|#################################################################################| -\*===============================================================================*/ - -// Sourced from https://raw.githubusercontent.com/Asmodean-/dolphin/89d640cd557189bb5f921fc219150c74c39bdc55/Data/Sys/Shaders/DolphinFX.glsl with modifications. - -/* -[configuration] - -[OptionRangeFloat] -GUIName = EdgeStrength -OptionName = A_EDGE_STRENGTH -MinValue = 0.00 -MaxValue = 4.00 -StepAmount = 0.01 -DefaultValue = 1.00 - -[OptionRangeFloat] -GUIName = EdgeFilter -OptionName = B_EDGE_FILTER -MinValue = 0.25 -MaxValue = 1.00 -StepAmount = 0.01 -DefaultValue = 0.60 - -[OptionRangeFloat] -GUIName = EdgeThickness -OptionName = C_EDGE_THICKNESS -MinValue = 0.25 -MaxValue = 2.00 -StepAmount = 0.01 -DefaultValue = 1.00 - -[OptionRangeInteger] -GUIName = PaletteType -OptionName = D_PALETTE_TYPE -MinValue = 0 -MaxValue = 2 -StepAmount = 1 -DefaultValue = 1 - -[OptionRangeInteger] -GUIName = UseYuvLuma -OptionName = E_YUV_LUMA -MinValue = 0 -MaxValue = 1 -StepAmount = 1 -DefaultValue = 0 - -[OptionRangeInteger] -GUIName = ColourRounding -OptionName = G_COLOR_ROUNDING -MinValue = 0 -MaxValue = 1 -StepAmount = 1 -DefaultValue = 1 - -[/configuration] -*/ - -//Average relative luminance -CONSTANT float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750); -float AvgLuminance(float3 color) -{ - return sqrt( - (color.x * color.x * lumCoeff.x) + - (color.y * color.y * lumCoeff.y) + - (color.z * color.z * lumCoeff.z)); -} - -float3 YUVtoRGB(float3 YUV) -{ - const float3x3 m = float3x3( - 1.000, 0.000, 1.28033, - 1.000,-0.21482,-0.38059, - 1.000, 2.12798, 0.000 ); - - return mul(m, YUV); -} - -float3 RGBtoYUV(float3 RGB) -{ - const float3x3 m = float3x3( - 0.2126, 0.7152, 0.0722, - -0.09991,-0.33609, 0.436, - 0.615, -0.55861, -0.05639 ); - - return mul(m, RGB); -} - -void main() -{ - float4 color = Sample(); - float2 texcoord = GetCoordinates(); - float2 pixelSize = GetInvResolution(); - float2 texSize = GetResolution(); - - float3 yuv; - float3 sum = color.rgb; - - const int NUM = 9; - const float2 RoundingOffset = float2(0.25, 0.25); - const float3 thresholds = float3(9.0, 8.0, 6.0); - - float lum[NUM]; - float3 col[NUM]; - float2 set[NUM] = BEGIN_ARRAY(float2, NUM) - float2(-0.0078125, -0.0078125), - float2(0.00, -0.0078125), - float2(0.0078125, -0.0078125), - float2(-0.0078125, 0.00), - float2(0.00, 0.00), - float2(0.0078125, 0.00), - float2(-0.0078125, 0.0078125), - float2(0.00, 0.0078125), - float2(0.0078125, 0.0078125) END_ARRAY; - - for (int i = 0; i < NUM; i++) - { - col[i] = SampleLocation(texcoord + set[i] * RoundingOffset).rgb; - - if (GetOption(G_COLOR_ROUNDING) == 1) { - col[i].r = round(col[i].r * thresholds.r) / thresholds.r; - col[i].g = round(col[i].g * thresholds.g) / thresholds.g; - col[i].b = round(col[i].b * thresholds.b) / thresholds.b; } - - lum[i] = AvgLuminance(col[i].xyz); - yuv = RGBtoYUV(col[i]); - - if (GetOption(E_YUV_LUMA) == 0) - { yuv.r = round(yuv.r * thresholds.r) / thresholds.r; } - else - { yuv.r = saturate(round(yuv.r * lum[i]) / thresholds.r + lum[i]); } - - yuv = YUVtoRGB(yuv); - sum += yuv; - } - - float3 shadedColor = (sum / NUM); - float2 pixel = float2((1.0/texSize.x) * GetOption(C_EDGE_THICKNESS), - (1.0/texSize.y) * GetOption(C_EDGE_THICKNESS)); - - float edgeX = dot(SampleLocation(texcoord + pixel).rgb, lumCoeff); - edgeX = dot(float4(SampleLocation(texcoord - pixel).rgb, edgeX), float4(lumCoeff, -1.0)); - - float edgeY = dot(SampleLocation(texcoord + float2(pixel.x, -pixel.y)).rgb, lumCoeff); - edgeY = dot(float4(SampleLocation(texcoord + float2(-pixel.x, pixel.y)).rgb, edgeY), float4(lumCoeff, -1.0)); - - float edge = dot(float2(edgeX, edgeY), float2(edgeX, edgeY)); - - if (GetOption(D_PALETTE_TYPE) == 0) - { color.rgb = lerp(color.rgb, color.rgb + pow(edge, GetOption(B_EDGE_FILTER)) * -GetOption(A_EDGE_STRENGTH), GetOption(A_EDGE_STRENGTH)); } - else if (GetOption(D_PALETTE_TYPE) == 1) - { color.rgb = lerp(color.rgb + pow(edge, GetOption(B_EDGE_FILTER)) * -GetOption(A_EDGE_STRENGTH), shadedColor, 0.25); } - else if (GetOption(D_PALETTE_TYPE) == 2) - { color.rgb = lerp(shadedColor + edge * -GetOption(A_EDGE_STRENGTH), pow(edge, GetOption(B_EDGE_FILTER)) * -GetOption(A_EDGE_STRENGTH) + color.rgb, 0.50); } - - color.a = AvgLuminance(color.rgb); - - SetOutput(saturate(color)); -} diff --git a/data/resources/shaders/dolphinfx/crt/CRT-EASYMODE.glsl b/data/resources/shaders/dolphinfx/crt/CRT-EASYMODE.glsl deleted file mode 100644 index 76159328f..000000000 --- a/data/resources/shaders/dolphinfx/crt/CRT-EASYMODE.glsl +++ /dev/null @@ -1,277 +0,0 @@ -// CRT Shader by EasyMode -// License: GPL - -// A flat CRT shader ideally for 1080p or higher displays. - -// Recommended Settings: - -// Video -// - Aspect Ratio: 4:3 -// - Integer Scale: Off - -// Shader -// - Filter: Nearest -// - Scale: Don't Care - -// Example RGB Mask Parameter Settings: - -// Aperture Grille (Default) -// - Dot Width: 1 -// - Dot Height: 1 -// - Stagger: 0 - -// Lottes' Shadow Mask -// - Dot Width: 2 -// - Dot Height: 1 -// - Stagger: 3 - - -/* -[configuration] - -[OptionRangeFloat] -GUIName = Sharpness Horizontal -OptionName = SHARPNESS_H -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.05 -DefaultValue = 0.5 - -[OptionRangeFloat] -GUIName = Sharpness Vertical -OptionName = SHARPNESS_V -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.05 -DefaultValue = 1.0 - -[OptionRangeFloat] -GUIName = Mask Strength -OptionName = MASK_STRENGTH -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.01 -DefaultValue = 0.3 - -[OptionRangeFloat] -GUIName = Mask Dot Width -OptionName = MASK_DOT_WIDTH -MinValue = 1.0 -MaxValue = 100.0 -StepAmount = 1.0 -DefaultValue = 1.0 - -[OptionRangeFloat] -GUIName = Mask Dot Height -OptionName = MASK_DOT_HEIGHT -MinValue = 1.0 -MaxValue = 100.0 -StepAmount = 1.0 -DefaultValue = 1.0 - -[OptionRangeFloat] -GUIName = Mask Stagger -OptionName = MASK_STAGGER -MinValue = 0.0 -MaxValue = 100.0 -StepAmount = 1.0 -DefaultValue = 0.0 - -[OptionRangeFloat] -GUIName = Mask Size -OptionName = MASK_SIZE -MinValue = 1.0 -MaxValue = 100.0 -StepAmount = 1.0 -DefaultValue = 1.0 - -[OptionRangeFloat] -GUIName = Scanline Strength -OptionName = SCANLINE_STRENGTH -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.05 -DefaultValue = 1.0 - -[OptionRangeFloat] -GUIName = Scanline Beam Width Min. -OptionName = SCANLINE_BEAM_WIDTH_MIN -MinValue = 0.5 -MaxValue = 5.0 -StepAmount = 0.5 -DefaultValue = 1.5 - -[OptionRangeFloat] -GUIName = Scanline Beam Width Max. -OptionName = SCANLINE_BEAM_WIDTH_MAX -MinValue = 0.5 -MaxValue = 5.0 -StepAmount = 0.5 -DefaultValue = 1.5 - -[OptionRangeFloat] -GUIName = Scanline Brightness Min. -OptionName = SCANLINE_BRIGHT_MIN -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.05 -DefaultValue = 0.35 - -[OptionRangeFloat] -GUIName = Scanline Brightness Max. -OptionName = SCANLINE_BRIGHT_MAX -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.05 -DefaultValue = 0.65 - -[OptionRangeFloat] -GUIName = Scanline Cutoff -OptionName = SCANLINE_CUTOFF -MinValue = 1.0 -MaxValue = 1000.0 -StepAmount = 1.0 -DefaultValue = 400.0 - -[OptionRangeFloat] -GUIName = Gamma Input -OptionName = GAMMA_INPUT -MinValue = 0.1 -MaxValue = 5.0 -StepAmount = 0.1 -DefaultValue = 2.0 - -[OptionRangeFloat] -GUIName = Gamma Output -OptionName = GAMMA_OUTPUT -MinValue = 0.1 -MaxValue = 5.0 -StepAmount = 0.1 -DefaultValue = 1.8 - -[OptionRangeFloat] -GUIName = Brightness Boost -OptionName = BRIGHT_BOOST -MinValue = 1.0 -MaxValue = 2.0 -StepAmount = 0.01 -DefaultValue = 1.2 - -[OptionRangeFloat] -GUIName = Dilation -OptionName = DILATION -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 1.0 -DefaultValue = 1.0 - -[/configuration] -*/ - -#define FIX(c) max(abs(c), 1e-5) -#define PI 3.141592653589 - -#define TEX2D(c) dilate(SampleLocation(c)) - -// Set to 0 to use linear filter and gain speed -#define ENABLE_LANCZOS 1 - -vec4 dilate(vec4 col) -{ - vec4 x = mix(vec4(1.0), col, GetOption(DILATION)); - - return col * x; -} - -float curve_distance(float x, float sharp) -{ - -/* - apply half-circle s-curve to distance for sharper (more pixelated) interpolation - single line formula for Graph Toy: - 0.5 - sqrt(0.25 - (x - step(0.5, x)) * (x - step(0.5, x))) * sign(0.5 - x) -*/ - - float x_step = step(0.5, x); - float curve = 0.5 - sqrt(0.25 - (x - x_step) * (x - x_step)) * sign(0.5 - x); - - return mix(x, curve, sharp); -} - -mat4x4 get_color_matrix(vec2 co, vec2 dx) -{ - return mat4x4(TEX2D(co - dx), TEX2D(co), TEX2D(co + dx), TEX2D(co + 2.0 * dx)); -} - -vec3 filter_lanczos(vec4 coeffs, mat4x4 color_matrix) -{ - vec4 col = color_matrix * coeffs; - vec4 sample_min = min(color_matrix[1], color_matrix[2]); - vec4 sample_max = max(color_matrix[1], color_matrix[2]); - - col = clamp(col, sample_min, sample_max); - - return col.rgb; -} - -void main() -{ - vec2 vTexCoord = GetCoordinates(); - vec2 nativeSize = 1.0 / GetInvNativePixelSize(); - vec4 SourceSize = vec4(nativeSize, 1.0/nativeSize); - - vec2 dx = vec2(SourceSize.z, 0.0); - vec2 dy = vec2(0.0, SourceSize.w); - vec2 pix_co = vTexCoord * SourceSize.xy - vec2(0.5, 0.5); - vec2 tex_co = (floor(pix_co) + vec2(0.5, 0.5)) * SourceSize.zw; - vec2 dist = fract(pix_co); - float curve_x; - vec3 col, col2; - -#if ENABLE_LANCZOS - curve_x = curve_distance(dist.x, GetOption(SHARPNESS_H) * GetOption(SHARPNESS_H)); - - vec4 coeffs = PI * vec4(1.0 + curve_x, curve_x, 1.0 - curve_x, 2.0 - curve_x); - - coeffs = FIX(coeffs); - coeffs = 2.0 * sin(coeffs) * sin(coeffs * 0.5) / (coeffs * coeffs); - coeffs /= dot(coeffs, vec4(1.0)); - - col = filter_lanczos(coeffs, get_color_matrix(tex_co, dx)); - col2 = filter_lanczos(coeffs, get_color_matrix(tex_co + dy, dx)); -#else - curve_x = curve_distance(dist.x, GetOption(SHARPNESS_H)); - - col = mix(TEX2D(tex_co).rgb, TEX2D(tex_co + dx).rgb, curve_x); - col2 = mix(TEX2D(tex_co + dy).rgb, TEX2D(tex_co + dx + dy).rgb, curve_x); -#endif - - col = mix(col, col2, curve_distance(dist.y, GetOption(SHARPNESS_V))); - col = pow(col, vec3(GetOption(GAMMA_INPUT) / (GetOption(DILATION) + 1.0))); - - float luma = dot(vec3(0.2126, 0.7152, 0.0722), col); - float bright = (max(col.r, max(col.g, col.b)) + luma) * 0.5; - float scan_bright = clamp(bright, GetOption(SCANLINE_BRIGHT_MIN), GetOption(SCANLINE_BRIGHT_MAX)); - float scan_beam = clamp(bright * GetOption(SCANLINE_BEAM_WIDTH_MAX), GetOption(SCANLINE_BEAM_WIDTH_MIN), GetOption(SCANLINE_BEAM_WIDTH_MAX)); - float scan_weight = 1.0 - pow(cos(vTexCoord.y * 2.0 * PI * SourceSize.y) * 0.5 + 0.5, scan_beam) * GetOption(SCANLINE_STRENGTH); - - float mask = 1.0 - GetOption(MASK_STRENGTH); - vec2 mod_fac = floor(vTexCoord * GetWindowSize().xy * SourceSize.xy / (SourceSize.xy * vec2(GetOption(MASK_SIZE), GetOption(MASK_DOT_HEIGHT) * GetOption(MASK_SIZE)))); - int dot_no = int(mod((mod_fac.x + mod(mod_fac.y, 2.0) * GetOption(MASK_STAGGER)) / GetOption(MASK_DOT_WIDTH), 3.0)); - vec3 mask_weight; - - if (dot_no == 0) mask_weight = vec3(1.0, mask, mask); - else if (dot_no == 1) mask_weight = vec3(mask, 1.0, mask); - else mask_weight = vec3(mask, mask, 1.0); - - if (SourceSize.y >= GetOption(SCANLINE_CUTOFF)) - scan_weight = 1.0; - - col2 = col.rgb; - col *= vec3(scan_weight); - col = mix(col, col2, scan_bright); - col *= mask_weight; - col = pow(col, vec3(1.0 / GetOption(GAMMA_OUTPUT))); - - SetOutput(vec4(col * GetOption(BRIGHT_BOOST), 1.0)); -} diff --git a/data/resources/shaders/dolphinfx/crt/ZFAST-CRT-COMPOSITE.glsl b/data/resources/shaders/dolphinfx/crt/ZFAST-CRT-COMPOSITE.glsl deleted file mode 100644 index 0f5beb338..000000000 --- a/data/resources/shaders/dolphinfx/crt/ZFAST-CRT-COMPOSITE.glsl +++ /dev/null @@ -1,180 +0,0 @@ -// zfast_crt - A very simple CRT shader. - -// Copyright (C) 2017 Greg Hogan (SoltanGris42) -// edited by metallic 77. -// ported to slang by gregoricavichioli & hunterk. -// ported to dolphinfx by Hyllian. - -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or (at your option) -// any later version. - - -/* -[configuration] - -[OptionRangeFloat] -GUIName = Curvature -OptionName = Curvature -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 1.0 -DefaultValue = 1.0 - -[OptionRangeFloat] -GUIName = Convergence X-Axis -OptionName = blurx -MinValue = -1.0 -MaxValue = 2.0 -StepAmount = 0.05 -DefaultValue = 0.85 - -[OptionRangeFloat] -GUIName = Convergence Y-Axis -OptionName = blury -MinValue = -1.0 -MaxValue = 1.0 -StepAmount = 0.05 -DefaultValue = -0.10 - -[OptionRangeFloat] -GUIName = Scanline Amount (Low) -OptionName = HIGHSCANAMOUNT1 -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.05 -DefaultValue = 0.4 - -[OptionRangeFloat] -GUIName = Scanline Amount (High) -OptionName = HIGHSCANAMOUNT2 -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.05 -DefaultValue = 0.3 - -[OptionRangeFloat] -GUIName = Mask Type -OptionName = TYPE -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 1.0 -DefaultValue = 0.0 - -[OptionRangeFloat] -GUIName = Mask Effect Amount -OptionName = MASK_DARK -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.05 -DefaultValue = 0.3 - -[OptionRangeFloat] -GUIName = Mask/Scanline Fade -OptionName = MASK_FADE -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.05 -DefaultValue = 0.7 - -[OptionRangeFloat] -GUIName = Saturation -OptionName = sat -MinValue = 0.0 -MaxValue = 3.0 -StepAmount = 0.05 -DefaultValue = 1.0 - -[OptionRangeFloat] -GUIName = Flicker -OptionName = FLICK -MinValue = 0.0 -MaxValue = 50.0 -StepAmount = 1.0 -DefaultValue = 10.0 - -[/configuration] -*/ - -#define pi 3.14159 - -#define blur_y GetOption(blury)/(SourceSize.y*2.0) -#define blur_x GetOption(blurx)/(SourceSize.x*2.0) -#define iTimer (float(GetTime())*2.0) -#define flicker GetOption(FLICK)/1000.0 - -// Distortion of scanlines, and end of screen alpha. -vec2 Warp(vec2 pos) -{ - pos = pos*2.0-1.0; - pos *= vec2(1.0 + (pos.y*pos.y)*0.03, 1.0 + (pos.x*pos.x)*0.05); - - return pos*0.5 + 0.5; -} - - -void main() -{ - vec2 vTexCoord = GetCoordinates(); - vec2 texSize = 1.0 / GetInvNativePixelSize(); - vec4 SourceSize = vec4(texSize, 1.0 / texSize); - - float maskFade = 0.3333*GetOption(MASK_FADE); - float omega = 2.0*pi*SourceSize.y; - - vec2 pos,corn; - if (GetOption(Curvature) == 1.0) -{ - pos = Warp(vTexCoord.xy); - corn = min(pos,vec2(1.0)-pos); // This is used to mask the rounded - corn.x = 0.00001/corn.x; // corners later on - -} - - else pos = vTexCoord; - float OGL2Pos = pos.y*SourceSize.y; - float cent = floor(OGL2Pos)+0.5; - float ycoord = cent*SourceSize.w; - ycoord = mix(pos.y,ycoord,0.6); - pos = vec2(pos.x,ycoord); - - - vec3 sample1 = sin(iTimer)*flicker + SampleLocation(vec2(pos.x + blur_x, pos.y - blur_y)).rgb; - vec3 sample2 = 0.5*SampleLocation(pos).rgb; - vec3 sample3 = sin(iTimer)*flicker + SampleLocation(vec2(pos.x - blur_x, pos.y + blur_y)).rgb; - - vec3 colour = vec3 (sample1.r*0.5 + sample2.r, - sample1.g*0.25 + sample2.g + sample3.g*0.25, - sample2.b + sample3.b*0.5); - - vec3 interl = colour; - vec3 lumweight=vec3(0.22,0.71,0.07); - float lumsat = dot(colour,lumweight); - - vec3 graycolour = vec3(lumsat); - colour = vec3(mix(graycolour,colour.rgb,sat)); - - float SCANAMOUNT = mix(GetOption(HIGHSCANAMOUNT1),GetOption(HIGHSCANAMOUNT2),max(max(colour.r,colour.g),colour.b)); - - - if (SourceSize.y > 400.0) { - colour ; - } -else { - colour *= SCANAMOUNT * sin(fract(OGL2Pos)*3.14159)+1.0-SCANAMOUNT; - colour *= SCANAMOUNT * sin(fract(1.0-OGL2Pos)*3.14159)+1.0-SCANAMOUNT; - colour *= SCANAMOUNT * sin(fract(1.0+OGL2Pos)*3.14159)+1.0-SCANAMOUNT; - } - - float steps; if (GetOption(TYPE) == 0.0) steps = 0.5; else steps = 0.3333; - float whichmask = fract(vTexCoord.x*GetWindowSize().x*steps); - float mask = 1.0 + float(whichmask < steps) * (-GetOption(MASK_DARK)); - - colour.rgb = mix(mask*colour, colour, dot(colour.rgb,vec3(maskFade))); - - if (GetOption(Curvature) == 1.0 && corn.y < corn.x || GetOption(Curvature) == 1.0 && corn.x < 0.00001 ) - colour = vec3(0.0); - - SetOutput(vec4(colour.rgb, 1.0)); -} diff --git a/data/resources/shaders/dolphinfx/interpolation/JINC2.glsl b/data/resources/shaders/dolphinfx/interpolation/JINC2.glsl deleted file mode 100644 index 7f57820bc..000000000 --- a/data/resources/shaders/dolphinfx/interpolation/JINC2.glsl +++ /dev/null @@ -1,144 +0,0 @@ - -// Hyllian's jinc windowed-jinc 2-lobe with anti-ringing Shader - -// Copyright (C) 2011-2024 Hyllian - sergiogdb@gmail.com - -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -/* -[configuration] - -[OptionRangeFloat] -GUIName = Window Sinc Param -OptionName = JINC2_WINDOW_SINC -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.01 -DefaultValue = 0.50 - -[OptionRangeFloat] -GUIName = Sinc Param -OptionName = JINC2_SINC -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.01 -DefaultValue = 0.88 - -[OptionRangeFloat] -GUIName = Anti-ringing Strength -OptionName = JINC2_AR_STRENGTH -MinValue = 0.0 -MaxValue = 1.0 -StepAmount = 0.1 -DefaultValue = 0.5 - -[/configuration] -*/ - -#define halfpi 1.5707963267948966192313216916398 -#define pi 3.1415926535897932384626433832795 -#define wa (JINC2_WINDOW_SINC*pi) -#define wb (JINC2_SINC*pi) - -// Calculates the distance between two points -float d(vec2 pt1, vec2 pt2) -{ - vec2 v = pt2 - pt1; - return sqrt(dot(v,v)); -} - -vec3 min4(vec3 a, vec3 b, vec3 c, vec3 d) -{ - return min(a, min(b, min(c, d))); -} - -vec3 max4(vec3 a, vec3 b, vec3 c, vec3 d) -{ - return max(a, max(b, max(c, d))); -} - -vec4 resampler(vec4 x) -{ - vec4 res; - res.x = (x.x==0.0) ? wa*wb : sin(x.x*wa)*sin(x.x*wb)/(x.x*x.x); - res.y = (x.y==0.0) ? wa*wb : sin(x.y*wa)*sin(x.y*wb)/(x.y*x.y); - res.z = (x.z==0.0) ? wa*wb : sin(x.z*wa)*sin(x.z*wb)/(x.z*x.z); - res.w = (x.w==0.0) ? wa*wb : sin(x.w*wa)*sin(x.w*wb)/(x.w*x.w); - return res; -} - -void main() -{ - vec2 SourceSize = 1.0 / GetInvNativePixelSize(); - vec2 invSourceSize = 1.0 / SourceSize; - vec2 vTexCoord = GetCoordinates(); - - vec3 color; - mat4x4 weights; - - vec2 dx = vec2(1.0, 0.0); - vec2 dy = vec2(0.0, 1.0); - - vec2 pc = vTexCoord*SourceSize; - - vec2 tc = (floor(pc-vec2(0.5,0.5))+vec2(0.5,0.5)); - - weights[0] = resampler(vec4(d(pc, tc -dx -dy), d(pc, tc -dy), d(pc, tc +dx -dy), d(pc, tc+2.0*dx -dy))); - weights[1] = resampler(vec4(d(pc, tc -dx ), d(pc, tc ), d(pc, tc +dx ), d(pc, tc+2.0*dx ))); - weights[2] = resampler(vec4(d(pc, tc -dx +dy), d(pc, tc +dy), d(pc, tc +dx +dy), d(pc, tc+2.0*dx +dy))); - weights[3] = resampler(vec4(d(pc, tc -dx+2.0*dy), d(pc, tc +2.0*dy), d(pc, tc +dx+2.0*dy), d(pc, tc+2.0*dx+2.0*dy))); - - dx = dx * invSourceSize; - dy = dy * invSourceSize; - tc = tc * invSourceSize; - - // reading the texels - - vec3 c00 = SampleLocation(tc -dx -dy).xyz; - vec3 c10 = SampleLocation(tc -dy).xyz; - vec3 c20 = SampleLocation(tc +dx -dy).xyz; - vec3 c30 = SampleLocation(tc+2.0*dx -dy).xyz; - vec3 c01 = SampleLocation(tc -dx ).xyz; - vec3 c11 = SampleLocation(tc ).xyz; - vec3 c21 = SampleLocation(tc +dx ).xyz; - vec3 c31 = SampleLocation(tc+2.0*dx ).xyz; - vec3 c02 = SampleLocation(tc -dx +dy).xyz; - vec3 c12 = SampleLocation(tc +dy).xyz; - vec3 c22 = SampleLocation(tc +dx +dy).xyz; - vec3 c32 = SampleLocation(tc+2.0*dx +dy).xyz; - vec3 c03 = SampleLocation(tc -dx+2.0*dy).xyz; - vec3 c13 = SampleLocation(tc +2.0*dy).xyz; - vec3 c23 = SampleLocation(tc +dx+2.0*dy).xyz; - vec3 c33 = SampleLocation(tc+2.0*dx+2.0*dy).xyz; - - // Get min/max samples - vec3 min_sample = min4(c11, c21, c12, c22); - vec3 max_sample = max4(c11, c21, c12, c22); - - color = mat4x3(c00, c10, c20, c30) * weights[0]; - color+= mat4x3(c01, c11, c21, c31) * weights[1]; - color+= mat4x3(c02, c12, c22, c32) * weights[2]; - color+= mat4x3(c03, c13, c23, c33) * weights[3]; - color = color/(dot(weights * vec4(1.0), vec4(1.0))); - - // Anti-ringing - vec3 aux = color; - color = clamp(color, min_sample, max_sample); - - color = mix(aux, color, JINC2_AR_STRENGTH); - - // final sum and weight normalization - SetOutput(vec4(color, 1.0)); -} diff --git a/data/resources/shaders/dolphinfx/scanlines.glsl b/data/resources/shaders/dolphinfx/scanlines.glsl deleted file mode 100644 index 58ced020c..000000000 --- a/data/resources/shaders/dolphinfx/scanlines.glsl +++ /dev/null @@ -1,120 +0,0 @@ -/*===============================================================================*\ -|######################## [Dolphin FX Suite 2.20] #######################| -|########################## By Asmodean ##########################| -|| || -|| This program is free software; you can redistribute it and/or || -|| modify it under the terms of the GNU General Public License || -|| as published by the Free Software Foundation; either version 2 || -|| of the License, or (at your option) any later version. || -|| || -|| This program is distributed in the hope that it will be useful, || -|| but WITHOUT ANY WARRANTY; without even the implied warranty of || -|| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the || -|| GNU General Public License for more details. (C)2015 || -|| || -|#################################################################################| -\*===============================================================================*/ - -// Sourced from https://raw.githubusercontent.com/Asmodean-/dolphin/89d640cd557189bb5f921fc219150c74c39bdc55/Data/Sys/Shaders/DolphinFX.glsl with modifications. - -/* -[configuration] - -[OptionRangeInteger] -GUIName = ScanlineType -OptionName = A_SCANLINE_TYPE -MinValue = 0 -MaxValue = 2 -StepAmount = 1 -DefaultValue = 0 - -[OptionRangeFloat] -GUIName = ScanlineIntensity -OptionName = B_SCANLINE_INTENSITY -MinValue = 0.15 -MaxValue = 0.30 -StepAmount = 0.01 -DefaultValue = 0.18 - -[OptionRangeFloat] -GUIName = ScanlineThickness -OptionName = B_SCANLINE_THICKNESS -MinValue = 0.20 -MaxValue = 0.80 -StepAmount = 0.01 -DefaultValue = 0.50 - -[OptionRangeFloat] -GUIName = ScanlineBrightness -OptionName = B_SCANLINE_BRIGHTNESS -MinValue = 0.50 -MaxValue = 2.00 -StepAmount = 0.01 -DefaultValue = 1.10 - -[OptionRangeFloat] -GUIName = ScanlineSpacing -OptionName = B_SCANLINE_SPACING -MinValue = 0.10 -MaxValue = 0.99 -StepAmount = 0.01 -DefaultValue = 0.25 - -[/configuration] -*/ - -//Average relative luminance -CONSTANT float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750); -float AvgLuminance(float3 color) -{ - return sqrt( - (color.x * color.x * lumCoeff.x) + - (color.y * color.y * lumCoeff.y) + - (color.z * color.z * lumCoeff.z)); -} - -void main() -{ - float4 color = Sample(); - float4 intensity = float4(0.0, 0.0, 0.0, 0.0); - - if (GetOption(A_SCANLINE_TYPE) == 0) { //X coord scanlines - if (fract(gl_FragCoord.y * GetOption(B_SCANLINE_SPACING)) > GetOption(B_SCANLINE_THICKNESS)) - { - intensity = float4(0.0, 0.0, 0.0, 0.0); - } - else - { - intensity = smoothstep(0.2, GetOption(B_SCANLINE_BRIGHTNESS), color) + - normalize(float4(color.xyz, AvgLuminance(color.xyz))); - } } - - else if (GetOption(A_SCANLINE_TYPE) == 1) { //Y coord scanlines - if (fract(gl_FragCoord.x * GetOption(B_SCANLINE_SPACING)) > GetOption(B_SCANLINE_THICKNESS)) - { - intensity = float4(0.0, 0.0, 0.0, 0.0); - } - else - { - intensity = smoothstep(0.2, GetOption(B_SCANLINE_BRIGHTNESS), color) + - normalize(float4(color.xyz, AvgLuminance(color.xyz))); - } } - - else if (GetOption(A_SCANLINE_TYPE) == 2) { //XY coord scanlines - if (fract(gl_FragCoord.x * GetOption(B_SCANLINE_SPACING)) > GetOption(B_SCANLINE_THICKNESS) && - fract(gl_FragCoord.y * GetOption(B_SCANLINE_SPACING)) > GetOption(B_SCANLINE_THICKNESS)) - { - intensity = float4(0.0, 0.0, 0.0, 0.0); - } - else - { - intensity = smoothstep(0.2, GetOption(B_SCANLINE_BRIGHTNESS), color) + - normalize(float4(color.xyz, AvgLuminance(color.xyz))); - } } - - float level = (4.0-GetCoordinates().x) * GetOption(B_SCANLINE_INTENSITY); - - color = intensity * (0.5 - level) + color * 1.1; - - SetOutput(saturate(color)); -} \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/CRT-Guest-HD.fx b/data/resources/shaders/reshade/Shaders/CRT-Guest-HD.fx deleted file mode 100644 index dad01a946..000000000 --- a/data/resources/shaders/reshade/Shaders/CRT-Guest-HD.fx +++ /dev/null @@ -1,2157 +0,0 @@ -/* - - CRT - Guest - HD (Copyright (C) 2018-2024 guest(r) - guest.r@gmail.com) - - Incorporates many good ideas and suggestions from Dr. Venom. - - I would also like give thanks to many Libretro forums members for continuous feedbacks, suggestions and caring about the shader. - - This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - - This program is distributed in the hopes that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along with this program; if not, - write to the Free Software Foundation, Inc, 59 Temple Place - STE 330, Boston, MA 02111-1307, USA. - - Ported to ReShade by DevilSingh with some help from guest(r) - -*/ - -uniform float internal_res < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 8.0; - ui_step = 0.1; - ui_label = "Internal Resolution"; -> = 1.0; - -uniform float PR < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.5; - ui_step = 0.01; - ui_label = "Persistence 'R'"; -> = 0.32; - -uniform float PG < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.5; - ui_step = 0.01; - ui_label = "Persistence 'G'"; -> = 0.32; - -uniform float PB < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.5; - ui_step = 0.01; - ui_label = "Persistence 'B'"; -> = 0.32; - -uniform float AS < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.6; - ui_step = 0.01; - ui_label = "Afterglow Strength"; -> = 0.2; - -uniform float sat < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "Afterglow Saturation"; -> = 0.5; - -uniform float CS < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "Display Gamut: sRGB | Modern | DCI | Adobe | Rec. 2020"; -> = 0.0; - -uniform float CP < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 5.0; - ui_step = 1.0; - ui_label = "CRT Profile: EBU | P22 | SMPTE-C | Philips | Trinitron"; -> = 0.0; - -uniform float TNTC < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "LUT Colors: Trinitron 1 | Trinitron 2 | Nec MultiSync | NTSC"; -> = 0.0; - -uniform float LUTLOW < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Fix LUT Dark Range"; -> = 5.0; - -uniform float LUTBR < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "Fix LUT Brightness"; -> = 1.0; - -uniform float WP < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 5.0; - ui_label = "Color Temperature %"; -> = 0.0; - -uniform float wp_saturation < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Saturation Adjustment"; -> = 1.0; - -uniform float pre_bb < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.01; - ui_label = "Brightness Adjustment"; -> = 1.0; - -uniform float contr < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Contrast Adjustment"; -> = 0.0; - -uniform float sega_fix < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Sega Brightness Fix"; -> = 0.0; - -uniform float BP < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 25.0; - ui_step = 1.0; - ui_label = "Raise Black Level"; -> = 0.0; - -uniform float vigstr < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Vignette Strength"; -> = 0.0; - -uniform float vigdef < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 3.0; - ui_step = 0.1; - ui_label = "Vignette Size"; -> = 1.0; - -uniform float gamma_i < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 0.05; - ui_label = "Gamma Input"; -> = 1.80; - -uniform float gamma_o < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 0.05; - ui_label = "Gamma Out"; -> = 1.75; - -uniform float interr < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 800.0; - ui_step = 25.0; - ui_label = "Interlace Trigger Resolution / VGA Trigger"; -> = 375.0; - -uniform float interm < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "Interlace Mode: 0:OFF | 1-3:Normal | 4:Interpolation"; -> = 4.0; - -uniform float iscanb < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Interlacing Scanlines Effect (Interlaced Brightness)"; -> = 0.2; - -uniform float iscans < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Interlacing Scanlines Saturation"; -> = 0.25; - -uniform float vga_mode < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "VGA Single/Double Scan Mode"; -> = 0.0; - -uniform float hiscan < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "High Resolution Scanlines (Prepend A Scaler)"; -> = 0.0; - -uniform float intres < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 6.0; - ui_step = 0.5; - ui_label = "Internal Resolution Y: 0.5 | Y-Dowsample"; -> = 0.0; - -uniform float HSHARPNESS < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 8.0; - ui_step = 0.05; - ui_label = "Horizontal Filter Range"; -> = 1.0; - -uniform float SIGMA_HOR < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 7.0; - ui_step = 0.025; - ui_label = "Horizontal Blur Sigma"; -> = 0.5; - -uniform float S_SHARPH < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.1; - ui_label = "Horizontal Substractive Sharpness"; -> = 1.0; - -uniform float HSHARP < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.1; - ui_label = "Horizontal Sharpness Definition"; -> = 1.2; - -uniform float HARNG < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 0.1; - ui_label = "Horizontal Substractive Sharpness Ringing"; -> = 0.2; - -uniform float VSHARPNESS < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 8.0; - ui_step = 0.05; - ui_label = "Vertical Filter Range"; -> = 1.0; - -uniform float SIGMA_VER < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 7.0; - ui_step = 0.025; - ui_label = "Vertical Blur Sigma"; -> = 0.5; - -uniform float S_SHARPV < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.1; - ui_label = "Vertical Substractive Sharpness"; -> = 1.0; - -uniform float VSHARP < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.1; - ui_label = "Vertical Sharpness Definition"; -> = 1.2; - -uniform float VARNG < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 0.1; - ui_label = "Vertical Substractive Sharpness Ringing"; -> = 0.2; - -uniform float MAXS < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.3; - ui_step = 0.01; - ui_label = "Maximum Sharpness"; -> = 0.15; - -uniform float m_glow < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 1.0; - ui_label = "Ordinary Glow | Magic Glow"; -> = 0.0; - -uniform float m_glow_cutoff < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.4; - ui_step = 0.01; - ui_label = "Magic Glow Cutoff"; -> = 0.12; - -uniform float m_glow_low < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 7.0; - ui_step = 0.05; - ui_label = "Magic Glow Low Strength"; -> = 0.35; - -uniform float m_glow_high < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 7.0; - ui_step = 0.1; - ui_label = "Magic Glow High Strength"; -> = 5.0; - -uniform float m_glow_dist < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 4.0; - ui_step = 0.05; - ui_label = "Magic Glow Distribution"; -> = 1.0; - -uniform float m_glow_mask < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.025; - ui_label = "Magic Glow Mask Strength"; -> = 1.0; - -uniform float FINE_GAUSS < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 1.0; - ui_label = "Fine (Magic) Glow Sampling"; -> = 1.0; - -uniform float SIZEH < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Horizontal Glow Radius"; -> = 6.0; - -uniform float SIGMA_H < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 15.0; - ui_step = 0.05; - ui_label = "Horizontal Glow Sigma"; -> = 1.2; - -uniform float SIZEV < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Vertical Glow Radius"; -> = 6.0; - -uniform float SIGMA_V < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 15.0; - ui_step = 0.05; - ui_label = "Vertical Glow Sigma"; -> = 1.2; - -uniform float FINE_BLOOM < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 1.0; - ui_label = "Fine Bloom/Halation Sampling"; -> = 1.0; - -uniform float SIZEX < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Horizontal Bloom/Halation Radius"; -> = 3.0; - -uniform float SIGMA_X < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 15.0; - ui_step = 0.025; - ui_label = "Horizontal Bloom/Halation Sigma"; -> = 0.75; - -uniform float SIZEY < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Vertical Bloom/Halation Radius"; -> = 3.0; - -uniform float SIGMA_Y < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 15.0; - ui_step = 0.025; - ui_label = "Vertical Bloom/Halation Sigma"; -> = 0.60; - -uniform float glow < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.01; - ui_label = "(Magic) Glow Strength"; -> = 0.08; - -uniform float bloom < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Bloom Strength"; -> = 0.0; - -uniform float b_mask < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.025; - ui_label = "Bloom Mask Strength"; -> = 0.0; - -uniform float mask_bloom < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Mask Bloom"; -> = 0.0; - -uniform float bloom_dist < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 3.0; - ui_step = 0.05; - ui_label = "Bloom Distribution"; -> = 0.0; - -uniform float halation < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.025; - ui_label = "Halation Strength"; -> = 0.0; - -uniform float h_mask < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.025; - ui_label = "Halation Mask Strength"; -> = 0.5; - -uniform float gamma_c < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 2.0; - ui_step = 0.025; - ui_label = "Gamma Correct"; -> = 1.0; - -uniform float brightboost1 < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 10.0; - ui_step = 0.05; - ui_label = "Bright Boost Dark Pixels"; -> = 1.4; - -uniform float brightboost2 < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 3.0; - ui_step = 0.025; - ui_label = "Bright Boost Bright Pixels"; -> = 1.1; - -uniform float clp < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Clip Saturated Color Beams"; -> = 0.0; - -uniform float gsl < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 2.0; - ui_step = 1.0; - ui_label = "Scanlines Type"; -> = 0.0; - -uniform float scanline1 < - ui_type = "drag"; - ui_min = -20.0; - ui_max = 40.0; - ui_step = 0.5; - ui_label = "Scanlines Beam Shape Center"; -> = 6.0; - -uniform float scanline2 < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 70.0; - ui_step = 1.0; - ui_label = "Scanlines Beam Shape Edges"; -> = 8.0; - -uniform float beam_min < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 10.0; - ui_step = 0.05; - ui_label = "Scanlines Shape Dark Pixels"; -> = 1.2; - -uniform float beam_max < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 3.5; - ui_step = 0.025; - ui_label = "Scanlines Shape Bright Pixels"; -> = 1.0; - -uniform float tds < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Thinner Dark Scanlines"; -> = 0.0; - -uniform float beam_size < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Increased Bright Scanlines Beam"; -> = 0.6; - -uniform float scans < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 6.0; - ui_step = 0.1; - ui_label = "Scanlines Saturation / Mask Falloff"; -> = 0.5; - -uniform float scan_falloff < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 2.0; - ui_step = 0.025; - ui_label = "Scanlines Falloff"; -> = 1.0; - -uniform float spike < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.1; - ui_label = "Scanlines Spike Removal"; -> = 1.0; - -uniform float ssharp < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.3; - ui_step = 0.01; - ui_label = "Smart Sharpen Scanlines"; -> = 0.0; - -uniform float scangamma < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 5.0; - ui_step = 0.05; - ui_label = "Scanlines Gamma"; -> = 2.4; - -uniform float no_scanlines < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.5; - ui_step = 0.05; - ui_label = "No-Scanlines Mode"; -> = 0.0; - -uniform float IOS < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "Integer Scaling: Odd:Y | Even:X+Y"; -> = 0.0; - -uniform float csize < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.005; - ui_label = "Corner Size"; -> = 0.0; - -uniform float bsize < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 3.0; - ui_step = 0.01; - ui_label = "Border Size"; -> = 0.01; - -uniform float sborder < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Border Intensity"; -> = 0.75; - -uniform float barspeed < - ui_type = "drag"; - ui_min = 5.0; - ui_max = 200.0; - ui_step = 1.0; - ui_label = "Hum Bar Speed"; -> = 50.0; - -uniform float barintensity < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "Hum Bar Intensity"; -> = 0.0; - -uniform float bardir < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Hum Bar Direction"; -> = 0.0; - -uniform float warpx < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Curvature X (Default 0.03)"; -> = 0.0; - -uniform float warpy < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Curvature Y (Default 0.04)"; -> = 0.0; - -uniform float c_shape < - ui_type = "drag"; - ui_min = 0.05; - ui_max = 0.6; - ui_step = 0.05; - ui_label = "Curvature Shape"; -> = 0.25; - -uniform float overscanx < - ui_type = "drag"; - ui_min = -200.0; - ui_max = 200.0; - ui_step = 1.0; - ui_label = "Overscan X Original Pixels"; -> = 0.0; - -uniform float overscany < - ui_type = "drag"; - ui_min = -200.0; - ui_max = 200.0; - ui_step = 1.0; - ui_label = "Overscan Y Original Pixels"; -> = 0.0; - -uniform float shadow_msk < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 14.0; - ui_step = 1.0; - ui_label = "CRT Mask: 1:CGWG | 2-5:Lottes | 6-14:Trinitron"; -> = 1.0; - -uniform float maskstr < - ui_type = "drag"; - ui_min = -0.5; - ui_max = 1.0; - ui_step = 0.025; - ui_label = "Mask Strength (1, 6-14)"; -> = 0.3; - -uniform float mcut < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Mask 6-14 Low Strength"; -> = 1.1; - -uniform float maskboost < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 3.0; - ui_step = 0.05; - ui_label = "CRT Mask Boost"; -> = 1.0; - -uniform float masksize < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "CRT Mask Size"; -> = 1.0; - -uniform float mask_zoom < - ui_type = "drag"; - ui_min = -5.0; - ui_max = 5.0; - ui_step = 1.0; - ui_label = "CRT Mask Zoom (+ Mask Width)"; -> = 0.0; - -uniform float zoom_mask < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "CRT Mask Zoom Sharpen"; -> = 0.0; - -uniform float mshift < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.5; - ui_label = "(Transform to) Shadow Mask"; -> = 0.0; - -uniform float mask_layout < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Mask Layout: RGB or BGR (Check LCD Panel)"; -> = 0.0; - -uniform float mask_drk < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Lottes Mask Dark"; -> = 0.5; - -uniform float mask_lgt < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Lottes Mask Bright"; -> = 1.5; - -uniform float mask_gamma < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 0.05; - ui_label = "Mask Gamma"; -> = 2.4; - -uniform float slotmask1 < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Slot Mask Strength Bright Pixels"; -> = 0.0; - -uniform float slotmask2 < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Slot Mask Strength Dark Pixels"; -> = 0.0; - -uniform float slotwidth < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 16.0; - ui_step = 1.0; - ui_label = "Slot Mask Width (0:Auto)"; -> = 0.0; - -uniform float double_slot < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "Slot Mask Height: 2x1 or 4x1"; -> = 2.0; - -uniform float slotms < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "Slot Mask Thickness"; -> = 1.0; - -uniform float smoothmask < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Smooth Masks In Bright Scanlines"; -> = 0.0; - -uniform float smask_mit < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Mitigate Slot Mask Interaction"; -> = 0.0; - -uniform float bmask < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Base (Black) Mask Strength"; -> = 0.0; - -uniform float mclip < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.025; - ui_label = "Preserve Mask Strength"; -> = 0.0; - -uniform float dctypex < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.75; - ui_step = 0.05; - ui_label = "Deconvergence Type X: 0:Static | Other:Dynamic"; -> = 0.0; - -uniform float dctypey < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.75; - ui_step = 0.05; - ui_label = "Deconvergence Type Y: 0:Static | Other:Dynamic"; -> = 0.0; - -uniform float deconrx < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Horizontal Deconvergence 'R' Range"; -> = 0.0; - -uniform float decongx < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Horizontal Deconvergence 'G' Range"; -> = 0.0; - -uniform float deconbx < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Horizontal Deconvergence 'B' Range"; -> = 0.0; - -uniform float deconry < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Vertical Deconvergence 'R' Range"; -> = 0.0; - -uniform float decongy < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Vertical Deconvergence 'G' Range"; -> = 0.0; - -uniform float deconby < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Vertical Deconvergence 'B' Range"; -> = 0.0; - -uniform float decons < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 3.0; - ui_step = 0.1; - ui_label = "Deconvergence Strength"; -> = 1.0; - -uniform float addnoised < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.02; - ui_label = "Add Noise"; -> = 0.0; - -uniform float noiseresd < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 10.0; - ui_step = 1.0; - ui_label = "Noise Resolution"; -> = 2.0; - -uniform float noisetype < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Noise Type: Colored | Luma"; -> = 0.0; - -uniform float post_br < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 5.0; - ui_step = 0.01; - ui_label = "Post Brightness"; -> = 1.0; - -#include "ReShade.fxh" - -#define TexSize float2(Resolution_X,Resolution_Y) -#define IptSize float2(800.00000000,600.00000000) -#define OptSize float4(BUFFER_SCREEN_SIZE,1.0/BUFFER_SCREEN_SIZE) -#define OrgSize float4(TexSize,1.0/TexSize) -#define SrcSize float4(IptSize,1.0/IptSize) -#define fuxcoord (texcoord*1.00001) -#define scans 1.5*scans -#define internal_res internal_res*(1.0/(1.0+hiscan)) -#define eps 1e-10 -#define fracoord (fuxcoord*OptSize.xy) -#define COMPAT_TEXTURE(c,d) tex2D(c,d) -#define inv_sqr_h 1.0/(2.0*SIGMA_H*SIGMA_H) -#define inv_sqr_v 1.0/(2.0*SIGMA_V*SIGMA_V) -#define inv_sqr_x 1.0/(2.0*SIGMA_X*SIGMA_X) -#define inv_sqr_y 1.0/(2.0*SIGMA_Y*SIGMA_Y) -#define invsigmah 1.0/(2.0*SIGMA_HOR*SIGMA_HOR*internal_res*internal_res) -#define invsigmav 1.0/(2.0*SIGMA_VER*SIGMA_VER*internal_res*internal_res) - -#ifndef Resolution_X -#define Resolution_X 320 -#endif - -#ifndef Resolution_Y -#define Resolution_Y 240 -#endif - -#define CRTHD_S0 ReShade::BackBuffer - -texture CRTHD_T1{Width=Resolution_X;Height=Resolution_Y ;Format=RGBA32F;}; -sampler CRTHD_S1{Texture=CRTHD_T1;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT ;MinFilter=POINT ;MipFilter=POINT ;}; - -texture CRTHD_T2{Width=Resolution_X;Height=Resolution_Y ;Format=RGBA16F;}; -sampler CRTHD_S2{Texture=CRTHD_T2;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT ;MinFilter=POINT ;MipFilter=POINT ;}; - -texture CRTHD_T3{Width=Resolution_X;Height=Resolution_Y ;Format=RGBA16F;}; -sampler CRTHD_S3{Texture=CRTHD_T3;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;}; - -texture CRTHD_T4{Width=BUFFER_WIDTH;Height=Resolution_Y ;Format=RGBA16F;}; -sampler CRTHD_S4{Texture=CRTHD_T4;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;}; - -texture CRTHD_T5{Width=800.00000000;Height=600.00000000 ;Format=RGBA16F;}; -sampler CRTHD_S5{Texture=CRTHD_T5;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;}; - -texture CRTHD_T6{Width=800.00000000;Height=600.00000000 ;Format=RGBA16F;}; -sampler CRTHD_S6{Texture=CRTHD_T6;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;}; - -texture CRTHD_T7{Width=800.00000000;Height=600.00000000 ;Format=RGBA16F;}; -sampler CRTHD_S7{Texture=CRTHD_T7;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;}; - -texture CRTHD_T8{Width=800.00000000;Height=600.00000000 ;Format=RGBA16F;}; -sampler CRTHD_S8{Texture=CRTHD_T8;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;}; - -texture CRTHD_T9{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16F;}; -sampler CRTHD_S9{Texture=CRTHD_T9;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;}; - -texture CRTHD_01{Width=1024;Height=32;}; -sampler CRTHD_L1{Texture=CRTHD_01;}; - -texture CRTHD_02{Width=1024;Height=32;}; -sampler CRTHD_L2{Texture=CRTHD_02;}; - -texture CRTHD_03{Width=1024;Height=32;}; -sampler CRTHD_L3{Texture=CRTHD_03;}; - -texture CRTHD_04{Width=1024;Height=32;}; -sampler CRTHD_L4{Texture=CRTHD_04;}; - -uniform int framecount; - -float3 fix_lut(float3 lut,float3 ref) -{ - float r=length(ref); - float l=length(lut); - float m=max(max(ref.r,ref.g),ref.b); - ref=normalize(lut+0.0000001)*lerp(r,l,pow(m,1.25)); - return lerp(lut,ref,LUTBR); -} - -float vignette(float2 pos) -{ - float2 b=vigdef*float2(1.0,OrgSize.x/OrgSize.y)*0.125; - pos=clamp(pos,0.0,1.0); - pos=abs(2.0*(pos-0.5)); - float2 res=lerp(0.0.xx,1.0.xx,smoothstep(1.0.xx,1.0.xx-b,sqrt(pos))); - res=pow(res,0.70.xx); - return max(lerp(1.0,sqrt(res.x*res.y),vigstr),0.0); -} - -float contrast(float x) -{ - return max(lerp(x,smoothstep(0.0,1.0,x),contr),0.0); -} - -float3 plant(float3 tar,float r) -{ - float t=max(max(tar.r,tar.g),tar.b)+0.00001; - return tar*r/t; -} - -float crthd_h(float x) -{ - return exp(-x*x*invsigmah); -} - -float crthd_v(float x) -{ - return exp(-x*x*invsigmav); -} - -float gauss_h(float x) -{ - return exp(-x*x*inv_sqr_h); -} - -float gauss_v(float x) -{ - return exp(-x*x*inv_sqr_v); -} - -float bloom_h(float x) -{ - return exp(-x*x*inv_sqr_x); -} - -float bloom_v(float x) -{ - return exp(-x*x*inv_sqr_y); -} - -float mod(float x,float y) -{ - return x-y* floor(x/y); -} - -float st0(float x) -{ - return exp2(-10.0*x*x); -} - -float st1(float x) -{ - return exp2(- 8.0*x*x); -} - -float3 sw0(float x,float color,float scanline,float3 c) -{ - float3 xe=lerp(1.0.xxx+scans,1.0.xxx,c); - float tmp=lerp(beam_min,beam_max,color); - float ex=x*tmp; - ex=(gsl>-0.5)?ex*ex:lerp(ex*ex,ex*ex*ex,0.4); - return exp2(-scanline*ex*xe); -} - -float3 sw1(float x,float color,float scanline,float3 c) -{ - float3 xe=lerp(1.0.xxx+scans,1.0.xxx,c); - x=lerp(x,beam_min*x,max(x-0.4*color,0.0)); - float tmp=lerp(1.2*beam_min,beam_max,color); - float ex=x*tmp; - return exp2(-scanline*ex*ex*xe); -} - -float3 sw2(float x,float color,float scanline,float3 c) -{ - float3 xe=lerp(1.0.xxx+scans,1.0.xxx,c); - float tmp=lerp((2.5-0.5*color)*beam_min,beam_max,color); - tmp=lerp(beam_max,tmp,pow(x,color+0.3)); - float ex=x*tmp; - return exp2(-scanline*ex*ex*xe); -} - -float2 overscan(float2 pos,float dx,float dy) -{ - pos=pos*2.0-1.0; - pos*=float2(dx,dy); - return pos*0.5+0.5; -} - -float2 warp(float2 pos) -{ - pos=pos*2.0-1.0; - pos=lerp(pos,float2(pos.x*rsqrt(1.0-c_shape*pos.y*pos.y),pos.y*rsqrt(1.0-c_shape*pos.x*pos.x)),float2(warpx,warpy)/c_shape); - return pos*0.5+0.5; -} - -float3 gc(float3 c) -{ - float mc=max(max(c.r,c.g),c.b); - float mg=pow(mc,1.0/gamma_c); - return c*mg/(mc+eps); -} - -float3 v_resample(float2 tex0,float4 size) -{ - float f= frac(size.y*tex0.y); - f=0.5-f; - float2 tex=tex0; - tex.y=floor(size.y*tex.y)*size.w+0.5*size.w; - float3 color=0.0.xxx; - float2 dy=float2(0.0,size.w); - float w=0.0; - float wsum=0.0; - float3 pixel; - float vsharpness=max(VSHARPNESS *internal_res,0.6); - float3 cmax=0.0.xxx; - float3 cmin=1.0.xxx; - float sharp= crthd_v(vsharpness)*S_SHARPV; - float maxsharp=MAXS; - float FPR=vsharpness; - float fpx=0.0; - float LOOPSIZE=ceil(2.0*FPR); - float CLPSIZE=round(2.0*LOOPSIZE/3.0); - float n=-LOOPSIZE; - do - { - pixel=COMPAT_TEXTURE(CRTHD_S4,tex+n*dy).rgb; - w=crthd_v(n+f)-sharp; - fpx=abs(n+f-sign(n)*FPR)/FPR; - if(abs(n)<=CLPSIZE){cmax=max(cmax,pixel); cmin=min(cmin,pixel);} - if(w<0.0)w=clamp(w,lerp(-maxsharp,0.0,pow(clamp(fpx,0.0,1.0),VSHARP)),0.0); - color=color+w*pixel; - wsum=wsum+w; - n=n+1.0; - }while(n<=LOOPSIZE); - color=color/wsum; - color=clamp(lerp(clamp(color,cmin,cmax),color,VARNG),0.0,1.0); - return color; -} - -float3 crt_mask(float2 pos,float mx,float mb) -{ - float3 mask=mask_drk; - float3 one=1.0; - if(shadow_msk== 1.0) - { - float mc=1.0-max(maskstr,0.0); - pos.x=frac(pos.x*0.5); - if(pos.x<0.49) - { - mask.r=1.0;mask.g= mc;mask.b=1.0; - }else - { - mask.r= mc;mask.g=1.0;mask.b= mc; - } - }else - if(shadow_msk== 2.0) - { - float lane=mask_lgt; - float odd=0.0; - if(frac(pos.x/6.0)<0.49)odd=1.0; - if(frac((pos.y+odd)/2.0)<0.49)lane=mask_drk; - pos.x=floor(mod(pos.x,3.0)); - if(pos.x<0.5)mask.r=mask_lgt;else - if(pos.x<1.5)mask.g=mask_lgt;else - mask.b= mask_lgt; - mask*=lane; - }else - if(shadow_msk== 3.0) - { - pos.x=floor(mod(pos.x,3.0)); - if(pos.x<0.5)mask.r=mask_lgt;else - if(pos.x<1.5)mask.g=mask_lgt;else - mask.b= mask_lgt; - }else - if(shadow_msk== 4.0) - { - pos.x+=pos.y*3.0; - pos.x=frac(pos.x/6.0); - if(pos.x<0.3)mask.r=mask_lgt;else - if(pos.x<0.6)mask.g=mask_lgt;else - mask.b= mask_lgt; - }else - if(shadow_msk== 5.0) - { - pos.xy=floor(pos.xy*float2(1.0,0.5)); - pos.x+=pos.y*3.0; - pos.x=frac(pos.x/6.0); - if(pos.x<0.3)mask.r=mask_lgt;else - if(pos.x<0.6)mask.g=mask_lgt;else - mask.b= mask_lgt; - }else - if(shadow_msk== 6.0) - { - mask=0.0; - pos.x=frac(pos.x/2.0); - if(pos.x<0.49) - { - mask.r=1.0; - mask.b=1.0; - }else - mask.g=1.0; - mask=clamp(lerp(lerp(one,mask,mcut),lerp(one,mask,maskstr),mx),0.0,1.0); - }else - if(shadow_msk== 7.0) - { - mask=0.0; - pos.x=floor(mod(pos.x,3.0)); - if(pos.x<0.5)mask.r=1.0;else - if(pos.x<1.5)mask.g=1.0;else - mask.b=1.0; - mask=clamp(lerp(lerp(one,mask,mcut),lerp(one,mask,maskstr),mx),0.0,1.0); - }else - if(shadow_msk== 8.0) - { - mask=0.0; - pos.x=frac(pos.x/2.0); - if(pos.x<0.49) - { - mask=0.0.xxx; - }else - mask=1.0.xxx; - mask=clamp(lerp(lerp(one,mask,mcut),lerp(one,mask,maskstr),mx),0.0,1.0); - }else - if(shadow_msk== 9.0) - { - mask=0.0; - pos.x=frac(pos.x/3.0); - if(pos.x<0.3)mask=0.0.xxx;else - if(pos.x<0.6)mask=1.0.xxx;else - mask=1.0.xxx; - mask=clamp(lerp(lerp(one,mask,mcut),lerp(one,mask,maskstr),mx),0.0,1.0); - }else - if(shadow_msk==10.0) - { - mask=0.0; - pos.x=frac(pos.x/3.0); - if(pos.x<0.3)mask =0.0.xxx;else - if(pos.x<0.6)mask.rb=1.0.xx ;else - mask.g=1.0; - mask=clamp(lerp(lerp(one,mask,mcut),lerp(one,mask,maskstr),mx),0.0,1.0); - }else - if(shadow_msk==11.0) - { - mask=0.0; - pos.x=frac(pos.x*0.25); - if(pos.x<0.2)mask =0.0.xxx;else - if(pos.x<0.4)mask.r=1.0 ;else - if(pos.x<0.7)mask.g=1.0 ;else - mask.b=1.0; - mask=clamp(lerp(lerp(one,mask,mcut),lerp(one,mask,maskstr),mx),0.0,1.0); - }else - if(shadow_msk==12.0) - { - mask=0.0; - pos.x=frac(pos.x*0.25); - if(pos.x<0.2)mask.r =1.0 ;else - if(pos.x<0.4)mask.rg=1.0.xx;else - if(pos.x<0.7)mask.gb=1.0.xx;else - mask.b=1.0;mask=clamp(lerp(lerp(one,mask,mcut),lerp(one,mask,maskstr),mx),0.0,1.0); - }else - if(shadow_msk==13.0) - { - mask=0.0; - pos.x=floor(mod(pos.x,7.0)); - if(pos.x<0.5)mask =0.0.xxx;else - if(pos.x<2.5)mask.r=1.0 ;else - if(pos.x<4.5)mask.g=1.0 ;else - mask.b=1.0; - mask=clamp(lerp(lerp(one,mask,mcut),lerp(one,mask,maskstr),mx),0.0,1.0); - }else - { - mask=0.0; - pos.x=floor(mod(pos.x,6.0)); - if(pos.x<0.5)mask =0.0.xxx;else - if(pos.x<1.5)mask.r =1.0 ;else - if(pos.x<2.5)mask.rg =1.0.xx ;else - if(pos.x<3.5)mask.rgb=1.0.xxx;else - if(pos.x<4.5)mask.gb =1.0.xx ;else - mask.b=1.0; - mask=clamp(lerp(lerp(one,mask,mcut),lerp(one,mask,maskstr),mx),0.0,1.0); - } - if(mask_layout>0.5)mask=mask.rbg; - float maskmin=min(min(mask.r,mask.g),mask.b); - return (mask-maskmin)*(1.0+(maskboost-1.0)*mb)+maskmin; -} - -float slt_mask(float2 pos,float m,float swidth) -{ - if ((slotmask1+slotmask2)==0.0)return 1.0;else - { - pos.y=floor(pos.y/slotms); - float mlen=swidth*2.0; - float px=floor( mod(pos.x, 0.99999*mlen)); - float py=floor(frac(pos.y/(2.0*double_slot))*2.0*double_slot); - float slot_dark=lerp(1.0-slotmask2,1.0-slotmask1,m); - float slot=1.0; - if(py==0.0&&px=swidth) slot=slot_dark; - return slot; - } -} - -float humbars(float pos) -{ - if (barintensity==0.0)return 1.0;else - { - pos=(barintensity>=0.0)?pos:(1.0-pos); - pos=frac(pos+ mod(float(framecount),barspeed)/(barspeed-1.0)); - pos=(barintensity< 0.0)?pos:(1.0-pos); - return (1.0-barintensity)+barintensity*pos; - } -} - -float corner(float2 pos) -{ - float2 bc= bsize*float2(1.0,OptSize.x/OptSize.y)*0.05; - pos=clamp(pos,0.0,1.0); - pos=abs(2.0*(pos-0.5)); - float csz=lerp(400.0,7.0,pow(4.0*csize,0.10)); - float crn=dot(pow(pos,csz.xx*float2(1.0,OptSize.y/OptSize.x)),1.0.xx); - crn=(csize==0.0)? max(pos.x,pos.y) : pow(crn,1.0/csz); - pos=max(pos,crn); - float2 rs=(bsize==0.0)? 1.0.xx : lerp(0.0.xx,1.0.xx,smoothstep(1.0.xx,1.0.xx-bc,sqrt(pos))); - rs=pow(rs, sborder.xx); - return sqrt(rs.x*rs.y); -} - -float3 declip(float3 c,float b) -{ - float m=max(max(c.r,c.g),c.b); - if(m>b)c=c*b/m; - return c; -} - -float igc(float mc) -{ - return pow(mc,gamma_c); -} - -float3 noise(float3 v) -{ - if(addnoised<0.0)v.z=-addnoised; else v.z= mod(v.z,6001.0)/1753.0; - v =frac(v)+frac(v*1e4)+frac(v*1e-4); - v+=float3(0.12345,0.6789,0.314159); - v =frac(v*dot(v,v)*123.456); - v =frac(v*dot(v,v)*123.456); - v =frac(v*dot(v,v)*123.456); - v =frac(v*dot(v,v)*123.456); - return v; -} - -void bring_pixel(inout float3 c,inout float3 b,inout float3 g,float2 coord,float2 boord) -{ - float stepx=OptSize.z; - float stepy=OptSize.w; - float2 dx=float2(stepx,0.0); - float2 dy=float2(0.0,stepy); - float posx= 2.0*coord.x-1.0; - float posy= 2.0*coord.y-1.0; - if(dctypex>0.025) - { - posx= sign(posx)*pow(abs(posx),1.05-dctypex); - dx=posx*dx; - } - if(dctypey>0.025) - { - posy= sign(posy)*pow(abs(posy),1.05-dctypey); - dy=posy*dy; - } - float2 rc=deconrx*dx+deconry*dy; - float2 gc=decongx*dx+decongy*dy; - float2 bc=deconbx*dx+deconby*dy; - float r1=COMPAT_TEXTURE(CRTHD_S9,coord+rc).r; - float g1=COMPAT_TEXTURE(CRTHD_S9,coord+gc).g; - float b1=COMPAT_TEXTURE(CRTHD_S9,coord+bc).b; - float ds=decons; - float3 d=float3(r1,g1,b1); - c=clamp(lerp(c,d,ds),0.0,1.0); - r1=COMPAT_TEXTURE(CRTHD_S8,boord+rc).r; - g1=COMPAT_TEXTURE(CRTHD_S8,boord+gc).g; - b1=COMPAT_TEXTURE(CRTHD_S8,boord+bc).b; - d=float3(r1,g1,b1); - b=g=lerp(b,d,min(ds,1.0)); - r1=COMPAT_TEXTURE(CRTHD_S6,boord+rc).r; - g1=COMPAT_TEXTURE(CRTHD_S6,boord+gc).g; - b1=COMPAT_TEXTURE(CRTHD_S6,boord+bc).b; - d=float3(r1,g1,b1); - g=lerp(g,d,min(ds,1.0)); -} - -float4 AfterglowPS(float4 position:SV_Position,float2 texcoord:TEXCOORD):SV_Target -{ - float2 dx=float2(OrgSize.z,0.0); - float2 dy=float2(0.0,OrgSize.w); - float w=1.0; - float3 color0=COMPAT_TEXTURE(CRTHD_S0,texcoord.xy ).rgb; - float3 color1=COMPAT_TEXTURE(CRTHD_S0,texcoord.xy-dx).rgb; - float3 color2=COMPAT_TEXTURE(CRTHD_S0,texcoord.xy+dx).rgb; - float3 color3=COMPAT_TEXTURE(CRTHD_S0,texcoord.xy-dy).rgb; - float3 color4=COMPAT_TEXTURE(CRTHD_S0,texcoord.xy+dy).rgb; - float3 clr=(2.5*color0+color1+color2+color3+color4)/6.5; - float3 a=COMPAT_TEXTURE(CRTHD_S1,texcoord.xy).rgb; - if((color0.r+color0.g+color0.b<5.0/255.0)){w=0.0;} - float3 result=lerp(max(lerp(clr,a,0.49+float3(PR,PG,PB))-1.25/255.0,0.0),clr,w); - return float4(result,w); -} - -float4 PreShaderPS(float4 position:SV_Position,float2 texcoord:TEXCOORD):SV_Target -{ - const float3x3 File0=float3x3(0.412391, 0.212639,0.019331, 0.357584,0.715169, 0.119195, 0.180481,0.072192,0.950532); - const float3x3 File1=float3x3(0.430554, 0.222004,0.020182, 0.341550,0.706655, 0.129553, 0.178352,0.071341,0.939322); - const float3x3 File2=float3x3(0.396686, 0.210299,0.006131, 0.372504,0.713766, 0.115356, 0.181266,0.075936,0.967571); - const float3x3 File3=float3x3(0.393521, 0.212376,0.018739, 0.365258,0.701060, 0.111934, 0.191677,0.086564,0.958385); - const float3x3 File4=float3x3(0.392258, 0.209410,0.016061, 0.351135,0.725680, 0.093636, 0.166603,0.064910,0.850324); - const float3x3 File5=float3x3(0.377923, 0.195679,0.010514, 0.317366,0.722319, 0.097826, 0.207738,0.082002,1.076960); - const float3x3 ToRGB=float3x3(3.240970,-0.969244,0.055630,-1.537383,1.875968,-0.203977,-0.498611,0.041555,1.056972); - const float3x3 ToMDN=float3x3(2.791723,-0.894766,0.041678,-1.173165,1.815586,-0.130886,-0.440973,0.032000,1.002034); - const float3x3 ToDCI=float3x3(2.493497,-0.829489,0.035846,-0.931384,1.762664,-0.076172,-0.402711,0.023625,0.956885); - const float3x3 ToADB=float3x3(2.041588,-0.969244,0.013444,-0.565007,1.875968,-0.118360,-0.344731,0.041555,1.015175); - const float3x3 ToREC=float3x3(1.716651,-0.666684,0.017640,-0.355671,1.616481,-0.042771,-0.253366,0.015769,0.942103); - const float3x3 D65_to_D55=float3x3(0.4850339153,0.2500956126,0.0227359648,0.3488957224,0.6977914447,0.1162985741,0.1302823568,0.0521129427,0.6861537456); - const float3x3 D65_to_D93=float3x3(0.3412754080,0.1759701322,0.0159972847,0.3646170520,0.7292341040,0.1215390173,0.2369894093,0.0947957637,1.2481442225); - float4 imgColor=COMPAT_TEXTURE(CRTHD_S0,texcoord.xy); - float4 aftrglow=COMPAT_TEXTURE(CRTHD_S1,texcoord.xy); - float w=1.0-aftrglow.w; - float l=length(aftrglow.rgb); - aftrglow.rgb=AS*w*normalize(pow(aftrglow.rgb+0.01,sat))*l; - float bp=w*BP/255.0; - if(sega_fix>0.5)imgColor.rgb=imgColor.rgb*(255.0/239.0); - imgColor.rgb=min(imgColor.rgb,1.0); - float3 color=imgColor.rgb; - if(int(TNTC)==0) - { - color.rgb=imgColor.rgb; - }else - { - float lutlow=LUTLOW/255.0;float invLS=1.0/32.0; - float3 lut_ref=imgColor.rgb+lutlow*(1.0-pow(imgColor.rgb,0.333.xxx)); - float lutb=lut_ref.b*(1.0-0.5*invLS); - lut_ref.rg=lut_ref.rg*(1.0-invLS)+0.5*invLS; - float tile1=ceil(lutb*(32.0-1.0)); - float tile0=max(tile1-1.0,0.0); - float f=frac(lutb*(32.0-1.0));if(f==0.0)f=1.0; - float2 coord0=float2(tile0+lut_ref.r,lut_ref.g)*float2(invLS,1.0); - float2 coord1=float2(tile1+lut_ref.r,lut_ref.g)*float2(invLS,1.0); - float4 color1,color2,res; - if(int(TNTC)==1) - { - color1=COMPAT_TEXTURE(CRTHD_L1,coord0); - color2=COMPAT_TEXTURE(CRTHD_L1,coord1); - res=lerp(color1,color2,f); - }else - if(int(TNTC)==2) - { - color1=COMPAT_TEXTURE(CRTHD_L2,coord0); - color2=COMPAT_TEXTURE(CRTHD_L2,coord1); - res=lerp(color1,color2,f); - }else - if(int(TNTC)==3) - { - color1=COMPAT_TEXTURE(CRTHD_L3,coord0); - color2=COMPAT_TEXTURE(CRTHD_L3,coord1); - res=lerp(color1,color2,f); - }else - if(int(TNTC)==4) - { - color1=COMPAT_TEXTURE(CRTHD_L4,coord0); - color2=COMPAT_TEXTURE(CRTHD_L4,coord1); - res=lerp(color1,color2,f); - } - res.rgb=fix_lut(res.rgb,imgColor.rgb); - color=lerp(imgColor.rgb,res.rgb,min(TNTC,1.0)); - } - float3 c=clamp(color,0.0,1.0); - float3x3 m_o; - float p; - if(CS==0.0){p=2.2;m_o=ToRGB;}else - if(CS==1.0){p=2.2;m_o=ToMDN;}else - if(CS==2.0){p=2.6;m_o=ToDCI;}else - if(CS==3.0){p=2.2;m_o=ToADB;}else - if(CS==4.0){p=2.4;m_o=ToREC;} - color=pow(c,p); - float3x3 m_i; - if(CP==0.0){m_i=File0;}else - if(CP==1.0){m_i=File1;}else - if(CP==2.0){m_i=File2;}else - if(CP==3.0){m_i=File3;}else - if(CP==4.0){m_i=File4;}else - if(CP==5.0){m_i=File5;} - color=mul(color,m_i); - color=mul(color,m_o); - color=clamp(color,0.0,1.0); - color=pow(color,1.0/p); - if(CP==-1.0)color=c; - float3 scolor1=plant(pow(color,wp_saturation),max(max(color.r,color.g),color.b)); - float luma=dot(color,float3(0.299,0.587,0.114)); - float3 scolor2=lerp(luma,color,wp_saturation); - color=(wp_saturation>1.0)?scolor1:scolor2; - color=plant(color,contrast(max(max(color.r,color.g),color.b))); - p=2.2; - color=clamp(color,0.0,1.0); - color=pow(color,p); - float3 warmer=mul(color,D65_to_D55); - warmer=mul(warmer,ToRGB); - float3 cooler=mul(color,D65_to_D93); - cooler=mul(cooler,ToRGB); - float m=abs(WP)/100.0; - float3 comp=(WP<0.0)?cooler:warmer; - color=lerp(color,comp,m); - color=pow(max(color,0.0),1.0/p); - if(BP>-0.5)color=color+aftrglow.rgb+bp;else - { - color=max(color+BP/255.0,0.0)/(1.0+BP/255.0*step(-BP/255.0,max(max(color.r,color.g),color.b)))+aftrglow.rgb; - } - color=min(color*pre_bb,1.0); - return float4(color,vignette(texcoord.xy)); -} - -float4 LinearizePS(float4 position:SV_Position,float2 texcoord:TEXCOORD):SV_Target -{ - float3 c1=COMPAT_TEXTURE(CRTHD_S2,fuxcoord).rgb; - float3 c2=COMPAT_TEXTURE(CRTHD_S2,fuxcoord+float2(0.0,OrgSize.w)).rgb; - float3 c=c1; - float intera=1.0; - float gamma_in=clamp(gamma_i,1.0,5.0); - float m1=max(max(c1.r,c1.g),c1.b); - float m2=max(max(c2.r,c2.g),c2.b); - float3 df=abs(c1-c2); - float d=max(max(df.r,df.g),df.b); - if(interm==2.0)d=lerp(0.1*d,10.0*d,step(m1/(m2+0.0001),m2/(m1+0.0001))); - float r=m1; - float yres_div=1.0;if(intres>1.25)yres_div=intres; - bool hscans =(hiscan>0.5); - if(interr<=OrgSize.y/yres_div&&interm>0.5&&intres!=1.0&&intres!=0.5&&vga_mode<0.5||hscans) - { - intera=0.25; - float liine_no=clamp(floor( mod(OrgSize.y*fuxcoord.y,2.0)),0.0,1.0); - float frame_no=clamp(floor( mod(float(framecount),2.0)),0.0,1.0); - float ii=abs(liine_no-frame_no); - if(interm< 3.5) - { - c2=plant(lerp(c2,c2*c2,iscans),max(max(c2.r,c2.g),c2.b)); - r=clamp(max(m1*ii,(1.0-iscanb)*min(m1,m2)),0.0,1.0); - c=plant(lerp(lerp(c1,c2,min(lerp(m1,1.0-m2,min(m1,1.0-m1))/(d+0.00001),1.0)),c1,ii),r); - if(interm==3.0)c=(1.0-0.5*iscanb)*lerp(c2,c1,ii); - } - if(interm==4.0){c=plant(lerp(c,c*c,0.5*iscans),max(max(c.r,c.g),c.b))*(1.0-0.5*iscanb); - } - if(hscans)c=c1; - } - if(vga_mode>0.5) - { - c=c1; if(interr<=OrgSize.y)intera=0.75;else intera=0.5; - } - c=pow(c,gamma_in); - if(fuxcoord.x>0.5)gamma_in=intera;else gamma_in=1.0/gamma_in; - return float4(c,gamma_in); -} - -float4 HGaussianPS(float4 position:SV_Position,float2 texcoord:TEXCOORD):SV_Target -{ - float4 GaussSize=float4(OrgSize.x,OrgSize.y,OrgSize.z,OrgSize.w)*lerp(1.0.xxxx,float4(FINE_GAUSS,FINE_GAUSS,1.0/FINE_GAUSS,1.0/FINE_GAUSS),min(FINE_GAUSS-1.0,1.0)); - float f=frac(GaussSize.x*texcoord.x); - f=0.5-f; - float2 tex=floor(GaussSize.xy*texcoord)*GaussSize.zw+0.5*GaussSize.zw; - float3 color=0.0; - float2 dx=float2(GaussSize.z ,0.0); - float3 pixel; - float w; - float wsum=0.0; - float n=-SIZEH; - do - { - pixel=COMPAT_TEXTURE(CRTHD_S3,tex+n*dx).rgb; - if(m_glow>0.5) - { - pixel=max(pixel-m_glow_cutoff,0.0); - pixel=plant(pixel,max(max(max(pixel.r,pixel.g),pixel.b)-m_glow_cutoff,0.0)); - } - w=gauss_h(n+f); - color=color+w*pixel; - wsum=wsum+w; - n=n+1.0; - }while(n<=SIZEH); - color=color/wsum; - return float4(color,1.0); -} - -float4 VGaussianPS(float4 position:SV_Position,float2 texcoord:TEXCOORD):SV_Target -{ - float4 GaussSize=float4(SrcSize.x,OrgSize.y,SrcSize.z,OrgSize.w)*lerp(1.0.xxxx,float4(FINE_GAUSS,FINE_GAUSS,1.0/FINE_GAUSS,1.0/FINE_GAUSS),min(FINE_GAUSS-1.0,1.0)); - float f=frac(GaussSize.y*texcoord.y); - f=0.5-f; - float2 tex=floor(GaussSize.xy*texcoord)*GaussSize.zw+0.5*GaussSize.zw; - float3 color=0.0; - float2 dy=float2(0.0,GaussSize.w ); - float3 pixel; - float w; - float wsum=0.0; - float n=-SIZEV; - do - { - pixel=COMPAT_TEXTURE(CRTHD_S5,tex+n*dy).rgb; - w=gauss_v(n+f); - color=color+w*pixel; - wsum=wsum+w; - n=n+1.0; - }while(n<=SIZEV); - color=color/wsum; - return float4(color,1.0); -} - -float4 BloomHorzPS(float4 position:SV_Position,float2 texcoord:TEXCOORD):SV_Target -{ - float4 BloomSize=float4(OrgSize.x,OrgSize.y,OrgSize.z,OrgSize.w)*lerp(1.0.xxxx,float4(FINE_BLOOM,FINE_BLOOM,1.0/FINE_BLOOM,1.0/FINE_BLOOM),min(FINE_BLOOM-1.0,1.0)); - float f=frac(BloomSize.x*texcoord.x); - f=0.5-f; - float2 tex=floor(BloomSize.xy*texcoord)*BloomSize.zw+0.5*BloomSize.zw; - float4 color=0.0; - float2 dx=float2(BloomSize.z ,0.0); - float4 pixel; - float w; - float wsum=0.0; - float n=-SIZEX; - do - { - pixel=COMPAT_TEXTURE(CRTHD_S3,tex+n*dx); - w=bloom_h(n+f); - pixel.a =max(max(pixel.r,pixel.g),pixel.b); - pixel.a*=pixel.a*pixel.a; - color=color+w*pixel; - wsum=wsum+w; - n=n+1.0; - }while(n<=SIZEX); - color=color/wsum; - return float4(color.rgb,pow(color.a,0.333333)); -} - -float4 BloomVertPS(float4 position:SV_Position,float2 texcoord:TEXCOORD):SV_Target -{ - float4 BloomSize=float4(SrcSize.x,OrgSize.y,SrcSize.z,OrgSize.w)*lerp(1.0.xxxx,float4(FINE_BLOOM,FINE_BLOOM,1.0/FINE_BLOOM,1.0/FINE_BLOOM),min(FINE_BLOOM-1.0,1.0)); - float f=frac(BloomSize.y*texcoord.y); - f=0.5-f; - float2 tex=floor(BloomSize.xy*texcoord)*BloomSize.zw+0.5*BloomSize.zw; - float4 color=0.0; - float2 dy=float2(0.0,BloomSize.w ); - float4 pixel; - float w; - float wsum=0.0; - float n=-SIZEY; - do - { - pixel=COMPAT_TEXTURE(CRTHD_S7,tex+n*dy); - w=bloom_v(n+f); - pixel.a*=pixel.a*pixel.a; - color=color+w*pixel; - wsum=wsum+w; - n=n+1.0; - }while(n<=SIZEY); - color=color/wsum; - return float4(color.rgb,pow(color.a,0.175000)); -} - -float4 HD_Pass1_PS(float4 position:SV_Position,float2 texcoord:TEXCOORD):SV_Target -{ - float2 prescalex=float2(tex2Dsize(CRTHD_S3,0))/OrgSize.xy; - float4 PALSize=OrgSize*float4(prescalex.x,prescalex.y,1.0/prescalex.x,1.0/prescalex.y); - float f=frac(PALSize.x*fuxcoord.x); - f=0.5-f; - float2 tex=floor(PALSize.xy*fuxcoord)*PALSize.zw+0.5*PALSize.zw; - float3 color=0.0.xxx; - float scolor=0.0; - float2 dx=float2(PALSize.z ,0.0); - float3 pixel; - float w=0.0; - float swsum=0.0; - float wsum=0.0; - float hsharpness=HSHARPNESS*internal_res; - float3 cmax=0.0.xxx; - float3 cmin=1.0.xxx; - float sharp=crthd_h(hsharpness)*S_SHARPH; - float maxsharp=MAXS; - float FPR=hsharpness; - float fpx=0.0; - float sp=0.0; - float sw=0.0; - float ts=0.025; - float3 luma=float3(0.2126,0.7152,0.0722); - float LOOPSIZE=ceil(2.0*FPR); - float CLPSIZE=round(2.0*LOOPSIZE/3.0); - float n=-LOOPSIZE; - do - { - pixel=COMPAT_TEXTURE(CRTHD_S3,tex+n*dx).rgb; - sp=max(max(pixel.r,pixel.g),pixel.b); - w=crthd_h(n+f)-sharp; - fpx=abs(n+f-sign(n)*FPR)/FPR; - if(abs(n)<=CLPSIZE){cmax=max(cmax,pixel); cmin=min(cmin,pixel);} - if(w<0.0)w=clamp(w,lerp(-maxsharp,0.0,pow(clamp(fpx,0.0,1.0),HSHARP)),0.0); - color=color+w*pixel; - wsum=wsum+w; - sw=max(w,0.0)*(dot(pixel,luma)+ts); - scolor=scolor+sw*sp; - swsum=swsum+sw; - n=n+1.0; - }while(n<=LOOPSIZE); - color =color/wsum; - scolor=scolor/swsum; - color =clamp(lerp(clamp(color,cmin,cmax),color,HARNG),0.0,1.0); - scolor=clamp(lerp(max(max(color.r,color.g),color.b),scolor,spike),0.0,1.0); - return float4(color,scolor); -} - -float4 HD_Pass2_PS(float4 position:SV_Position,float2 texcoord:TEXCOORD):SV_Target -{ - float2 prescalex=float2(tex2Dsize(CRTHD_S3,0))/OrgSize.xy; - float4 PALSize=float4(OrgSize.x,OrgSize.y,OrgSize.z,OrgSize.w); - float gamma_in=1.0/COMPAT_TEXTURE(CRTHD_S3,0.25).a; - float intera=COMPAT_TEXTURE(CRTHD_S3,float2(0.75,0.25)).a; - bool hscans=(hiscan>0.5); - bool interb=(((intera<0.35)||(no_scanlines>0.025))&&!hscans); - bool vgascan=((abs(intera-0.5)<0.05)&&(no_scanlines==0.0)); - float SourceY=PALSize.y; - float sy=1.0; - if( intres==1.0)sy=max(floor(SourceY/199.0),1.0); - if( intres>0.25&&intres!=1.0)sy=intres; - if(vgascan)sy=0.5; else if(abs(intera-0.75)<0.05)sy=1.0; - PALSize*=float4(1.0,1.0/sy,1.0,sy); - float2 lexcoord = fuxcoord.xy; - if(IOS> 0.0&&!interb) - { - float2 ofactor= OptSize.xy/OrgSize.xy; - float2 intfactor=(IOS<2.5)?floor(ofactor):ceil(ofactor); - float2 diff=ofactor/intfactor; - float scan=diff.y; - lexcoord=overscan(lexcoord,scan,scan); - if(IOS==1.0||IOS==3.0)lexcoord=float2(fuxcoord.x,lexcoord.y); - } - lexcoord=overscan(lexcoord,(OrgSize.x-overscanx)/OrgSize.x,(OrgSize.y-overscany)/OrgSize.y); - float2 pos=warp(lexcoord); - float coffset=0.5; - float2 ps=PALSize.zw; - float OGL2Pos=pos.y*PALSize.y-coffset; - float f=frac(OGL2Pos); - float2 dx=float2(ps.x,0.0); - float2 dy=float2(0.0,ps.y); - float2 pC4; - pC4.y=floor(OGL2Pos)*ps.y+0.5*ps.y; - pC4.x=pos.x; - if((intres==0.5&&prescalex.y<1.5)||vgascan)pC4.y=floor(pC4.y*OrgSize.y)*OrgSize.w+0.5*OrgSize.w; - if( interb&&no_scanlines>0.025)pC4.y=pC4.y+smoothstep(0.40-0.5*no_scanlines,0.60+0.5*no_scanlines,f)*PALSize.w; - float3 color1=COMPAT_TEXTURE(CRTHD_S4,pC4).rgb; - float3 scolor1=COMPAT_TEXTURE(CRTHD_S4,pC4).aaa; - float prescaley=float(tex2Dsize(CRTHD_S3,0).y)/OrgSize.y; - if( interb&&no_scanlines<0.05||hscans&&vgascan||hscans)color1=v_resample(pos,PALSize*float4(1.0,prescaley,1.0,1.0/prescaley)); - color1=pow(color1,scangamma/gamma_in); - pC4+=dy; - if((intres==0.5&&prescalex.y<1.5)||vgascan)pC4.y=floor((pos.y+0.33*dy.y)*OrgSize.y)*OrgSize.w+0.5*OrgSize.w; - float3 color2=COMPAT_TEXTURE(CRTHD_S4,pC4).rgb; - float3 scolor2=COMPAT_TEXTURE(CRTHD_S4,pC4).aaa; - color2=pow(color2,scangamma/gamma_in); - float3 ctmp=color1;float w3=1.0;float3 color=color1; - float3 one=1.0; - if( hscans){color2=color1;scolor2=scolor1;} - if(!interb||hscans) - { - float3 luma=float3(0.2126,0.7152,0.0722); - float ssub=ssharp*max(abs(scolor1.x-scolor2.x),abs(dot(color1,luma)-dot(color2,luma))); - float shape1=lerp(scanline1,scanline2+ssub*scolor1.x*35.0, f); - float shape2=lerp(scanline1,scanline2+ssub*scolor2.x*35.0,1.0-f); - float wt1=st0( f); - float wt2=st0(1.0- f); - float3 color0= color1*wt1+ color2*wt2; - float3 scolor0=scolor1*wt1+scolor2*wt2; - ctmp=color0/(wt1+wt2); - float3 sctmp=max(scolor0/(wt1+wt2),ctmp); - float3 w1,w2; - float3 cref1=lerp(sctmp,scolor1,beam_size);float creff1=pow(max(max(cref1.r,cref1.g),cref1.b),scan_falloff); - float3 cref2=lerp(sctmp,scolor2,beam_size);float creff2=pow(max(max(cref2.r,cref2.g),cref2.b),scan_falloff); - if(tds>0.5){shape1=lerp(scanline2,shape1,creff1);shape2=lerp(scanline2,shape2,creff2);} - float f1= f; - float f2=1.0- f; - float m1=max(max(color1.r,color1.g),color1.b)+eps; - float m2=max(max(color2.r,color2.g),color2.b)+eps; - cref1=color1/m1; - cref2=color2/m2; - if(gsl< 0.5) - {w1=sw0(f1,creff1,shape1,cref1);w2=sw0(f2,creff2,shape2,cref2);}else - if(gsl==1.0) - {w1=sw1(f1,creff1,shape1,cref1);w2=sw1(f2,creff2,shape2,cref2);}else - {w1=sw2(f1,creff1,shape1,cref1);w2=sw2(f2,creff2,shape2,cref2);} - float3 w3=w1+w2; - float wf1=max(max(w3.r,w3.g),w3.b); - if(wf1> 1.0) {wf1=1.0/wf1; w1*=wf1, w2*=wf1;} - if(abs(clp)>0.005) - { - sy=m1; one=(clp>0.0)?w1:1.0.xxx; - float sat=1.0001-min(min(cref1.r,cref1.g),cref1.b); - color1=lerp(color1,plant(pow(color1,0.70.xxx-0.325*sat),sy),pow(sat,0.3333)*one*abs(clp)); - sy=m2; one=(clp>0.0)?w2:1.0.xxx; - sat=1.0001-min(min(cref2.r,cref2.g),cref2.b); - color2=lerp(color2,plant(pow(color2,0.70.xxx-0.325*sat),sy),pow(sat,0.3333)*one*abs(clp)); - } - color=(gc(color1)*w1+gc(color2)*w2); - color=min(color,1.0); - } - if( interb) - { - color=gc(color1); - } - float colmx=max(max(ctmp.r,ctmp.g),ctmp.b); - color=pow(color,gamma_in/scangamma); - return float4(color,colmx); -} - -float4 ChromaticPS(float4 position:SV_Position,float2 texcoord:TEXCOORD):SV_Target -{ - float gamma_in=1.0/COMPAT_TEXTURE(CRTHD_S3,0.25).a; - float intera=COMPAT_TEXTURE(CRTHD_S3,float2(0.75,0.25)).a; - bool interb=((intera<0.35||no_scanlines>0.025)&&(hiscan<0.5)); - float2 lexcoord = fuxcoord.xy; - if(IOS> 0.0&&!interb) - { - float2 ofactor= OptSize.xy/OrgSize.xy; - float2 intfactor=(IOS<2.5)?floor(ofactor):ceil(ofactor); - float2 diff=ofactor/intfactor; - float scan=diff.y; - lexcoord=overscan(lexcoord,scan,scan); - if(IOS==1.0||IOS==3.0)lexcoord=float2(fuxcoord.x,lexcoord.y); - } - lexcoord=overscan(lexcoord,(OrgSize.x-overscanx)/OrgSize.x,(OrgSize.y-overscany)/OrgSize.y); - float2 pos0=warp(fuxcoord.xy); - float2 pos1=fuxcoord.xy; - float2 pos=warp(lexcoord); - float3 color=COMPAT_TEXTURE(CRTHD_S9,pos1).rgb; - float3 Bloom=COMPAT_TEXTURE(CRTHD_S8,pos).rgb; - float3 Glow=COMPAT_TEXTURE(CRTHD_S6,pos).rgb; - if((abs(deconrx)+abs(deconry)+abs(decongx)+abs(decongy)+abs(deconbx)+abs(deconby))>0.2) - bring_pixel(color,Bloom,Glow,pos1,pos); - float cm=igc(max(max(color.r,color.g),color.b)); - float mx1=COMPAT_TEXTURE(CRTHD_S9,pos1 ).a; - float colmx=max(mx1,cm); - float w3=min((cm+0.0001)/(colmx+0.0005),1.0);if(interb)w3=1.00; - float2 dx=float2(0.001,0.0); - float mx0=COMPAT_TEXTURE(CRTHD_S9,pos1-dx).a; - float mx2=COMPAT_TEXTURE(CRTHD_S9,pos1+dx).a; - float mxg=max(max(mx0,mx1),max(mx2,cm)); - float mx=pow(mxg,1.40/gamma_in); - dx=float2(OrgSize.z,0.0)*0.25; - mx0=COMPAT_TEXTURE(CRTHD_S9,pos1-dx).a; - mx2=COMPAT_TEXTURE(CRTHD_S9,pos1+dx).a; - float mb=(1.0-min(abs(mx0-mx2)/(0.5+mx1),1.0)); - float3 orig1=color; - float3 one=1.0; - float3 cmask=one; - float3 dmask=one; - float3 emask=one; - float mwidths[15]={0.0,2.0,3.0,3.0,6.0,6.0,2.4,3.5,2.4,3.25,3.5,4.5,4.25,7.5,6.25}; - float mwidth=mwidths[int(shadow_msk)]; - float mask_compensate=frac(mwidth); - if(shadow_msk> 0.5) - { - float2 maskcoord=fracoord.xy* 1.00001; - float2 scoord=maskcoord; - mwidth=floor(mwidth)*masksize; - float swidth=mwidth; - bool zoomed=(abs(mask_zoom)>0.75); - float mscale=1.0; - float2 maskcoord0=maskcoord; - maskcoord.y=floor(maskcoord.y/masksize); - float mwidth1=max(mwidth+mask_zoom,2.0); - if( mshift> 0.25) - { - float stagg_lvl=1.0; if(frac(mshift)>0.25)stagg_lvl=2.0; - float next_line=float(floor(mod(maskcoord.y,2.0*stagg_lvl))0.025 )mlerp=clamp((1.0+zoom_mask)*mlerp-0.5*zoom_mask,0.0,1.0); - float mcoord=floor(maskcoord.x/mscale); if(shadow_msk==13.0&&mask_zoom==-2.0)mcoord=ceil(maskcoord.x/mscale); - cmask*=lerp(crt_mask(float2(mcoord,maskcoord.y),mx,mb),crt_mask(float2(mcoord+1.0,maskcoord.y),mx,mb),mlerp); - } - if(slotwidth>0.5)swidth=slotwidth;float smask=1.0; - float sm_offset=0.0;bool bsm_offset=(shadow_msk==1.0||shadow_msk==3.0||shadow_msk==6.0||shadow_msk==7.0||shadow_msk==9.0||shadow_msk==12.0); - if( zoomed) - { - if(mask_layout<0.5&&bsm_offset)sm_offset=1.0;else - if(bsm_offset)sm_offset=-1.0; - } - swidth=round(swidth*mscale); - smask=slt_mask(scoord+float2(sm_offset,0.0),mx,swidth); - smask=clamp(smask+lerp(smask_mit,0.0,min(w3,pow(w3*max(max(orig1.r,orig1.g),orig1.b),0.33333))),0.0,1.0); - emask =cmask; - cmask*=smask; - dmask =cmask; - if(abs(mask_bloom)>0.025) - { - float maxbl=max(max(max(Bloom.r,Bloom.g),Bloom.b),mxg); - maxbl=maxbl*max(lerp(1.0,2.0-colmx,bloom_dist),0.0); - if(mask_bloom>0.025)cmask=max(min(cmask+maxbl*mask_bloom,1.0),cmask);else - cmask=max(lerp(cmask,cmask*(1.0-0.5*maxbl)+plant(pow(Bloom,0.35.xxx),maxbl),-mask_bloom),cmask); - } - color=pow(color,mask_gamma/gamma_in); - color=color*cmask; - color=min(color,1.0); - color=pow(color,gamma_in/mask_gamma); - cmask=min(cmask,1.0); - dmask=min(dmask,1.0); - } - float dark_compensate=lerp(max(clamp(lerp(mcut,maskstr,mx),0.0,1.0)-1.0+mask_compensate,0.0)+1.0,1.0,mx); if(shadow_msk< 0.5) dark_compensate=1.0; - float bb=lerp(brightboost1,brightboost2,mx)* dark_compensate; color*=bb; - float3 Ref=COMPAT_TEXTURE(CRTHD_S3,pos).rgb; - float maxb=COMPAT_TEXTURE(CRTHD_S8,pos).a; - float vig=COMPAT_TEXTURE(CRTHD_S2,clamp(pos,0.0+0.5*OrgSize.zw,1.0 -0.5*OrgSize.zw)).a; - float3 bcmask=lerp(one,cmask,b_mask); - float3 hcmask=lerp(one,cmask,h_mask); - float3 Bloom1=Bloom; - if(abs(bloom)>0.025) - { - if(bloom<-0.01)Bloom1=plant(Bloom,maxb); - Bloom1= min(Bloom1*(orig1+color), max(0.5*(colmx+orig1-color),0.001*Bloom1)); - Bloom1=0.5*(Bloom1+lerp(Bloom1,lerp(colmx*orig1,Bloom1,0.5),1.0-color)); - Bloom1= bcmask*Bloom1*max(lerp(1.0,2.0-colmx,bloom_dist),0.0); - color=pow(pow(color,mask_gamma/gamma_in)+abs(bloom)*pow(Bloom1,mask_gamma/gamma_in),gamma_in/mask_gamma); - } - if(!interb)color=declip(min(color,1.0),lerp(1.0,w3,0.6)); - if(halation> 0.01) - { - Bloom=0.5*(Bloom+Bloom*Bloom); - float mbl=max(max(Bloom.r,Bloom.g),Bloom.b); - float mxh=colmx+colmx*colmx; - Bloom=plant(Bloom,max(1.25*(mbl-0.1375),0.165*mxh*(1.0+w3))); - Bloom=max((2.0*lerp(maxb*maxb,maxb,colmx)-0.5*max(max(Ref.r,Ref.g),Ref.b)),0.25)*Bloom; - Bloom=min((2.5-colmx+0.5*color)*plant(0.375+orig1,lerp(0.5*(1.0+w3),(0.50+w3)/1.5,colmx))*hcmask*Bloom,1.0-color); - color=pow(pow(color,mask_gamma/gamma_in)+halation*pow(Bloom,mask_gamma/gamma_in),gamma_in/mask_gamma); - }else - if(halation<-0.01) - { - float mbl=max(max(Bloom.r,Bloom.g),Bloom.b); - Bloom=plant(Bloom+Ref+orig1+Bloom*Bloom*Bloom,min(mbl*mbl,0.75)); - color=color+2.0*lerp(1.0,w3,0.5*colmx)*hcmask*Bloom*(-halation); - } - float w=0.25+0.60*lerp(w3,1.0,sqrt(colmx)); - if(smoothmask>0.5) - { - color=min(color,1.0); color=max(min(color/w3,1.0)*w3, min(orig1*bb,color*(1.0-w3))); - } - if(m_glow<0.5)Glow=lerp(Glow,0.25*color,colmx);else - { - float3 orig2=plant(orig1+0.001*Ref,1.0); maxb=max(max(Glow.r,Glow.g),Glow.b); - Bloom=plant(Glow,1.0);Ref=abs(orig2-Bloom); - mx0=max(max(orig2.r,orig2.g),orig2.b)-min(min(orig2.r,orig2.g),orig2.b); - mx2=max(max(Bloom.r,Bloom.g),Bloom.b)-min(min(Bloom.r,Bloom.g),Bloom.b); - Bloom=lerp(maxb*min(Bloom,orig2),w*lerp(lerp(Glow,max(max(Ref.r,Ref.g),Ref.b)*Glow,max(mx,mx0)),lerp(color,Glow,mx2),max(mx0,mx2)*Ref),min(sqrt((1.10-mx0)*(0.10+mx2)),1.0)); - if(m_glow>1.5)Glow=lerp(0.5*Glow*Glow,Bloom,Bloom); - Glow=lerp(m_glow_low*Glow,m_glow_high*Bloom,pow(colmx,m_glow_dist/gamma_in)); - } - if(m_glow<0.5) - { - if(glow >=0.0)color=color+0.5*Glow*glow;else color=color+abs(glow)*min(emask*emask,1.0)*Glow;}else - { - float3 fmask= clamp(lerp(one,dmask,m_glow_mask),0.0,1.0); - color=color+abs(glow)*fmask*Glow; - } - color=min(color,1.0); - color=min(color,max(orig1,color)* lerp(one,dmask,mclip)); - color=pow(color,1.0/gamma_o); - float rc=0.6*sqrt(max(max(color.r,color.g),color.b))+0.4; - if(abs(addnoised)>0.01) - { - float3 noise0=noise(float3(floor(OptSize.xy*fuxcoord/noiseresd),float(framecount))); - if(noisetype<0.5)color=lerp(color,noise0,0.25*abs(addnoised)*rc);else - color=min(color*lerp(1.0,1.5*noise0.x,0.5*abs(addnoised)),1.0); - } - colmx=max(max(orig1.r,orig1.g),orig1.b); - color=color+bmask*lerp(emask,0.125*(1.0-colmx)*color,min(20.0*colmx,1.0)); - return float4(color*vig*humbars(lerp(pos.y,pos.x,bardir))*post_br*corner(pos0),1.0); -} - -technique CRT_Guest_HD -{ - pass Afterglow - { - VertexShader=PostProcessVS; - PixelShader=AfterglowPS; - RenderTarget=CRTHD_T1; - } - pass PreShader - { - VertexShader=PostProcessVS; - PixelShader=PreShaderPS; - RenderTarget=CRTHD_T2; - } - pass Linearize - { - VertexShader=PostProcessVS; - PixelShader=LinearizePS; - RenderTarget=CRTHD_T3; - } - pass CRT_Pass1 - { - VertexShader=PostProcessVS; - PixelShader=HD_Pass1_PS; - RenderTarget=CRTHD_T4; - } - pass GaussianX - { - VertexShader=PostProcessVS; - PixelShader=HGaussianPS; - RenderTarget=CRTHD_T5; - } - pass GaussianY - { - VertexShader=PostProcessVS; - PixelShader=VGaussianPS; - RenderTarget=CRTHD_T6; - } - pass BloomHorz - { - VertexShader=PostProcessVS; - PixelShader=BloomHorzPS; - RenderTarget=CRTHD_T7; - } - pass BloomVert - { - VertexShader=PostProcessVS; - PixelShader=BloomVertPS; - RenderTarget=CRTHD_T8; - } - pass CRT_Pass2 - { - VertexShader=PostProcessVS; - PixelShader=HD_Pass2_PS; - RenderTarget=CRTHD_T9; - } - pass Chromatic - { - VertexShader=PostProcessVS; - PixelShader=ChromaticPS; - } -} \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/CRT-Guest-NTSC.fx b/data/resources/shaders/reshade/Shaders/CRT-Guest-NTSC.fx deleted file mode 100644 index 5e62f1f74..000000000 --- a/data/resources/shaders/reshade/Shaders/CRT-Guest-NTSC.fx +++ /dev/null @@ -1,3879 +0,0 @@ -/* - - CRT - Guest - NTSC (Copyright (C) 2018-2024 guest(r) - guest.r@gmail.com) - - Incorporates many good ideas and suggestions from Dr. Venom. - - I would also like give thanks to many Libretro forums members for - continuous feedbacks, suggestions and caring about the shader. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - This program is distributed in the hopes that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc, 59 - Temple Place - STE 330, Boston, MA 02111-1307, USA. - - Ported to ReShade by DevilSingh with some help from guest(r) - - Clean up & Duckstation specific fixes & improvements by John Novak. - Thanks to Hyllian for the help & tips. - -*/ - -#include "ReShade.fxh" - -// --------------------------------------------------------------------------- -// NTSC -// --------------------------------------------------------------------------- - -uniform float quality < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.0; - ui_step = 1.0; - ui_label = "Values (Info Only): SVideo = 0 | Composite = 1.0 | RF = 2.0"; - ui_category = "NTSC"; -> = 0.0; - -uniform float cust_artifacting < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 5.0; - ui_step = 0.1; - ui_label = "NTSC Custom Artifacting Value"; - ui_category = "NTSC"; - ui_spacing = 2; -> = 1.0; - -uniform float cust_fringing < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 5.0; - ui_step = 0.1; - ui_label = "NTSC Custom Fringing Value"; - ui_category = "NTSC"; -> = 1.0; - -uniform int ntsc_fields < - ui_type = "combo"; - ui_items = "Auto\0" - "No\0" - "Yes\0"; - - ui_label = "NTSC Merge Fields"; - ui_category = "NTSC"; -> = 0; - -uniform int ntsc_phase < - ui_type = "combo"; - ui_items = "Auto\0" - "2 Phase\0" - "3 Phase\0" - "Mixed\0"; - - ui_label = "NTSC Phase"; - ui_category = "NTSC"; -> = 0; - -uniform float ntsc_scale < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 2.5; - ui_step = 0.025; - ui_label = "NTSC Resolution Scaling"; - ui_category = "NTSC"; -> = 1.0; - -uniform float ntsc_taps < - ui_type = "drag"; - ui_min = 6.0; - ui_max = 32.0; - ui_step = 1.0; - ui_label = "NTSC # of Taps (Filter Width)"; - ui_category = "NTSC"; -> = 32.0; - -uniform float ntsc_cscale1 < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 4.00; - ui_step = 0.05; - ui_label = "NTSC Chroma Scaling/Bleeding (2 Phase)"; - ui_category = "NTSC"; - ui_spacing = 2; -> = 1.0; - -uniform float ntsc_cscale2 < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 2.25; - ui_step = 0.05; - ui_label = "NTSC Chroma Scaling/Bleeding (3 Phase)"; - ui_category = "NTSC"; -> = 1.0; - -uniform float ntsc_sat < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.01; - ui_label = "NTSC Color Saturation"; - ui_category = "NTSC"; -> = 1.0; - -uniform float ntsc_brt < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.5; - ui_step = 0.01; - ui_label = "NTSC Brightness"; - ui_category = "NTSC"; -> = 1.0; - -uniform float ntsc_gamma < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 2.5; - ui_step = 0.025; - ui_label = "NTSC Filtering Gamma Correction"; - ui_category = "NTSC"; -> = 1.0; - -uniform float ntsc_rainbow < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.1; - ui_label = "NTSC Coloring/Rainbow Effect"; - ui_category = "NTSC"; -> = 0.0; - -uniform float ntsc_ring < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.1; - ui_label = "NTSC Anti-Ringing"; - ui_category = "NTSC"; - ui_spacing = 2; -> = 0.5; - -uniform float ntsc_shrp < - ui_type = "drag"; - ui_min = -10.0; - ui_max = 10.0; - ui_step = 0.5; - ui_label = "NTSC Sharpness (Negative: Adaptive)"; - ui_category = "NTSC"; -> = 0.0; - -uniform float ntsc_shpe < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "NTSC Sharpness Shape"; - ui_category = "NTSC"; -> = 0.75; - -uniform float CSHARPEN < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 5.0; - ui_step = 0.1; - ui_label = "FSharpen - Sharpen Strength"; - ui_category = "FSharpen"; -> = 0.0; - -uniform float CCONTR < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "FSharpen - Sharpen Contrast/Ringing"; - ui_category = "FSharpen"; -> = 0.05; - -// --------------------------------------------------------------------------- -// FSharpen -// --------------------------------------------------------------------------- - -uniform float CDETAILS < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "FSharpen - Sharpen Details"; - ui_category = "FSharpen"; -> = 1.0; - -uniform float DEBLUR < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 7.0; - ui_step = 0.25; - ui_label = "FSharpen - Deblur Strength"; - ui_category = "FSharpen"; -> = 1.0; - -// --------------------------------------------------------------------------- -// Persistence -// --------------------------------------------------------------------------- - -uniform float PR < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.5; - ui_step = 0.01; - ui_label = "Persistence 'R'"; - ui_category = "Persistence"; -> = 0.32; - -uniform float PG < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.5; - ui_step = 0.01; - ui_label = "Persistence 'G'"; - ui_category = "Persistence"; -> = 0.32; - -uniform float PB < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.5; - ui_step = 0.01; - ui_label = "Persistence 'B'"; - ui_category = "Persistence"; -> = 0.32; - -uniform float AS < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.6; - ui_step = 0.01; - ui_label = "Afterglow Strength"; - ui_category = "Persistence"; -> = 0.2; - -uniform float sat < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "Afterglow Saturation"; - ui_category = "Persistence"; -> = 0.5; - -// --------------------------------------------------------------------------- -// Color -// --------------------------------------------------------------------------- - -uniform int CS < - ui_type = "combo"; - ui_items = "sRGB\0" - "Modern\0" - "DCI\0" - "Adobe\0" - "Rec. 2020\0"; - - ui_label = "Display Gamut"; - ui_category = "Color"; -> = 0; - -uniform int CP < - ui_type = "combo"; - ui_items = "Off\0" - "EBU\0" - "P22\0" - "SMPTE-C\0" - "Philips\0" - "Trinitron 1\0" - "Trinitron 2\0"; - - ui_label = "CRT Profile"; - ui_category = "Color"; -> = 0; - -uniform int TNTC < - ui_type = "combo"; - ui_items = "Off\0" - "Trinitron 1\0" - "Trinitron 2\0" - "Nec MultiSync\0" - "NTSC\0"; - - ui_label = "LUT Colors"; - ui_category = "Color"; -> = 0; - -uniform float LUTLOW < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Fix LUT Dark Range"; - ui_category = "Color"; -> = 5.0; - -uniform float LUTBR < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "Fix LUT Brightness"; - ui_category = "Color"; -> = 1.0; - -uniform float WP < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 5.0; - ui_label = "Color Temperature %"; - ui_category = "Color"; -> = 0.0; - -uniform float wp_saturation < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Saturation Adjustment"; - ui_category = "Color"; -> = 1.0; - -uniform float clp < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Clip Saturated Color Beams"; - ui_category = "Color"; -> = 0.0; - -// --------------------------------------------------------------------------- -// Brightness -// --------------------------------------------------------------------------- - -uniform float gamma_i < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 0.05; - ui_label = "Gamma Input"; - ui_category = "Brightness / Gamma"; -> = 2.00; - -uniform float gamma_o < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 0.05; - ui_label = "Gamma Out"; - ui_category = "Brightness / Gamma"; -> = 1.95; - -uniform float gamma_c < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 2.0; - ui_step = 0.025; - ui_label = "Gamma Correct"; - ui_category = "Brightness / Gamma"; -> = 1.0; - -uniform float brightboost1 < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 10.0; - ui_step = 0.05; - ui_label = "Bright Boost Dark Pixels"; - ui_category = "Brightness / Gamma"; - ui_spacing = 2; -> = 1.4; - -uniform float brightboost2 < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 3.0; - ui_step = 0.025; - ui_label = "Bright Boost Bright Pixels"; - ui_category = "Brightness / Gamma"; -> = 1.1; - -uniform float pre_bb < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.01; - ui_label = "Brightness Adjustment"; - ui_category = "Brightness / Gamma"; -> = 1.0; - -uniform float contr < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Contrast Adjustment"; - ui_category = "Brightness / Gamma"; -> = 0.0; - -uniform float sega_fix < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Sega Brightness Fix"; - ui_category = "Brightness / Gamma"; -> = 0.0; - -uniform float BP < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 25.0; - ui_step = 1.0; - ui_label = "Raise Black Level"; - ui_category = "Brightness / Gamma"; -> = 0.0; - -uniform float post_br < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 5.0; - ui_step = 0.01; - ui_label = "Post Brightness"; - ui_category = "Brightness / Gamma"; -> = 1.0; - -// --------------------------------------------------------------------------- -// Interlacing -// --------------------------------------------------------------------------- - -uniform float interr < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 800.0; - ui_step = 25.0; - ui_label = "Interlace Trigger Resolution"; - ui_category = "Interlacing"; -> = 400.0; - -uniform int interm < - ui_type = "combo"; - ui_items = "Off\0" - "Normal 1\0" - "Normal 2\0" - "Normal 3\0" - "Interpolation 1\0" - "Interpolation 2\0"; - - ui_label = "Interlace Mode"; - ui_category = "Interlacing"; -> = 1; - -uniform float iscanb < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Interlacing Scanlines Effect (Interlaced Brightness)"; - ui_category = "Interlacing"; -> = 0.2; - -uniform float iscans < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Interlacing Scanlines Saturation"; - ui_category = "Interlacing"; -> = 0.25; - -uniform float hiscan < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "High Resolution Scanlines (Prepend A Scaler)"; - ui_category = "Interlacing"; -> = 0.0; - -// --------------------------------------------------------------------------- -// Resolution -// --------------------------------------------------------------------------- - -uniform float intres < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 6.0; - ui_step = 0.5; - ui_label = "Internal Resolution Y: 0.5 | Y-Dowsample"; - ui_category = "Resolution"; -> = 0.0; - -uniform float downsample_levelx < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Downsampling-X (High-Res Content, Pre-Scalers)"; - ui_category = "Resolution"; -> = 0.0; - -uniform float downsample_levely < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Downsampling-Y (High-Res Content, Pre-Scalers)"; - ui_category = "Resolution"; -> = 0.0; - -// --------------------------------------------------------------------------- -// Sharpness -// --------------------------------------------------------------------------- - -uniform float lsmooth < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "Raster Bloom Effect Smoothing"; - ui_category = "Sharpness"; -> = 0.7; - -uniform float HSHARPNESS < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 8.0; - ui_step = 0.05; - ui_label = "Horizontal Filter Range"; - ui_category = "Sharpness"; -> = 1.6; - -uniform float SIGMA_HOR < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 7.0; - ui_step = 0.025; - ui_label = "Horizontal Blur Sigma"; - ui_category = "Sharpness"; -> = 0.8; - -uniform float S_SHARPH < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 3.0; - ui_step = 0.05; - ui_label = "Substractive Sharpness"; - ui_category = "Sharpness"; -> = 1.2; - -uniform float HSHARP < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.1; - ui_label = "Sharpness Definition"; - ui_category = "Sharpness"; -> = 1.2; - -uniform float HARNG < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 0.05; - ui_label = "Substractive Sharpness Ringing"; - ui_category = "Sharpness"; -> = 0.3; - -uniform float MAXS < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.3; - ui_step = 0.01; - ui_label = "Maximum Sharpness"; - ui_category = "Sharpness"; -> = 0.18; - -// --------------------------------------------------------------------------- -// Glow -// --------------------------------------------------------------------------- - -uniform int m_glow < - ui_type = "combo"; - ui_items = "Ordinary Glow\0" - "Magic Glow 1\0" - "Magic Glow 2\0"; - - ui_label = "Glow Type"; - ui_category = "Glow"; -> = 0; - -uniform float m_glow_cutoff < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.4; - ui_step = 0.01; - ui_label = "Magic Glow Cutoff"; - ui_category = "Glow"; -> = 0.12; - -uniform float m_glow_low < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 7.0; - ui_step = 0.05; - ui_label = "Magic Glow Low Strength"; - ui_category = "Glow"; -> = 0.35; - -uniform float m_glow_high < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 7.0; - ui_step = 0.1; - ui_label = "Magic Glow High Strength"; - ui_category = "Glow"; -> = 5.0; - -uniform float m_glow_dist < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 4.0; - ui_step = 0.05; - ui_label = "Magic Glow Distribution"; - ui_category = "Glow"; -> = 1.0; - -uniform float m_glow_mask < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.025; - ui_label = "Magic Glow Mask Strength"; - ui_category = "Glow"; -> = 1.0; - -uniform float FINE_GAUSS < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 1.0; - ui_label = "Fine (Magic) Glow Sampling"; - ui_category = "Glow"; -> = 1.0; - -uniform float SIZEH < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Horizontal Glow Radius"; - ui_category = "Glow"; - ui_spacing = 2; -> = 6.0; - -uniform float SIGMA_H < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 15.0; - ui_step = 0.05; - ui_label = "Horizontal Glow Sigma"; - ui_category = "Glow"; -> = 1.2; - -uniform float SIZEV < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Vertical Glow Radius"; - ui_category = "Glow"; -> = 6.0; - -uniform float SIGMA_V < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 15.0; - ui_step = 0.05; - ui_label = "Vertical Glow Sigma"; - ui_category = "Glow"; -> = 1.2; - -uniform float glow < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.01; - ui_label = "(Magic) Glow Strength"; - ui_category = "Glow"; - ui_spacing = 2; -> = 0.08; - -// --------------------------------------------------------------------------- -// Bloom / Halation -// --------------------------------------------------------------------------- - -uniform float FINE_BLOOM < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 1.0; - ui_label = "Fine Bloom/Halation Sampling"; - ui_category = "Bloom / Halation"; -> = 1.0; - -uniform float SIZEX < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Horizontal Bloom/Halation Radius"; - ui_category = "Bloom / Halation"; -> = 3.0; - -uniform float SIGMA_X < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 15.0; - ui_step = 0.025; - ui_label = "Horizontal Bloom/Halation Sigma"; - ui_category = "Bloom / Halation"; -> = 0.75; - -uniform float SIZEY < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Vertical Bloom/Halation Radius"; - ui_category = "Bloom / Halation"; -> = 3.0; - -uniform float SIGMA_Y < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 15.0; - ui_step = 0.025; - ui_label = "Vertical Bloom/Halation Sigma"; - ui_category = "Bloom / Halation"; -> = 0.60; - -uniform float blm_1 < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Bloom Strength"; - ui_category = "Bloom / Halation"; - ui_spacing = 2; -> = 0.0; - -uniform float b_mask < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.025; - ui_label = "Bloom Mask Strength"; - ui_category = "Bloom / Halation"; -> = 0.0; - -uniform float mask_bloom < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Mask Bloom"; - ui_category = "Bloom / Halation"; -> = 0.0; - -uniform float bloom_dist < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 3.0; - ui_step = 0.05; - ui_label = "Bloom Distribution"; - ui_category = "Bloom / Halation"; -> = 0.0; - -uniform float halation < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.025; - ui_label = "Halation Strength"; - ui_category = "Bloom / Halation"; - ui_spacing = 2; -> = 0.0; - -uniform float h_mask < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.025; - ui_label = "Halation Mask Strength"; - ui_category = "Bloom / Halation"; -> = 0.5; - -// --------------------------------------------------------------------------- -// Scanlines -// --------------------------------------------------------------------------- - -uniform int gsl < - ui_type = "combo"; - ui_items = "Soft\0" - "Normal\0" - "Strong\0" - "Stronger\0"; - - ui_label = "Scanlines Type"; - ui_category = "Scanlines"; -> = 0; - -uniform float scanline1 < - ui_type = "drag"; - ui_min = -20.0; - ui_max = 40.0; - ui_step = 0.5; - ui_label = "Scanlines Beam Shape Center"; - ui_category = "Scanlines"; -> = 6.0; - -uniform float scanline2 < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 70.0; - ui_step = 1.0; - ui_label = "Scanlines Beam Shape Edges"; - ui_category = "Scanlines"; -> = 8.0; - -uniform float beam_min < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 10.0; - ui_step = 0.05; - ui_label = "Scanlines Shape Dark Pixels"; - ui_category = "Scanlines"; -> = 1.3; - -uniform float beam_max < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 3.5; - ui_step = 0.025; - ui_label = "Scanlines Shape Bright Pixels"; - ui_category = "Scanlines"; -> = 1.0; - -uniform bool tds < - ui_type = "radio"; - ui_label = "Thinner Dark Scanlines"; - ui_category = "Scanlines"; -> = false; - -uniform float beam_size < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Increased Bright Scanlines Beam"; - ui_category = "Scanlines"; -> = 0.6; - -uniform float scans < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 6.0; - ui_step = 0.1; - ui_label = "Scanlines Saturation / Mask Falloff"; - ui_category = "Scanlines"; -> = 0.5; - -uniform float scan_falloff < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 2.0; - ui_step = 0.025; - ui_label = "Scanlines Falloff"; - ui_category = "Scanlines"; -> = 1.0; - -uniform float spike < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.1; - ui_label = "Scanlines Spike Removal"; - ui_category = "Scanlines"; -> = 1.0; - -uniform float ssharp < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.3; - ui_step = 0.01; - ui_label = "Smart Sharpen Scanlines"; - ui_category = "Scanlines"; -> = 0.0; - -uniform float scangamma < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 5.0; - ui_step = 0.05; - ui_label = "Scanlines Gamma"; - ui_category = "Scanlines"; -> = 2.4; - -uniform float no_scanlines < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.5; - ui_step = 0.05; - ui_label = "No-Scanlines Mode"; - ui_category = "Scanlines"; -> = 0.0; - -// --------------------------------------------------------------------------- -// Scaling -// --------------------------------------------------------------------------- - -uniform float IOS < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "Integer Scaling: Odd:Y | Even:X+Y"; - ui_category = "Scaling"; -> = 0.0; - -uniform float overscanx < - ui_type = "drag"; - ui_min = -200.0; - ui_max = 200.0; - ui_step = 1.0; - ui_label = "Overscan X Original Pixels"; - ui_category = "Scaling"; -> = 0.0; - -uniform float overscany < - ui_type = "drag"; - ui_min = -200.0; - ui_max = 200.0; - ui_step = 1.0; - ui_label = "Overscan Y Original Pixels"; - ui_category = "Scaling"; -> = 0.0; - -uniform float OS < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 1.0; - ui_label = "Raster Bloom Overscan Mode"; - ui_category = "Scaling"; -> = 1.0; - -uniform float blm_2 < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 20.0; - ui_step = 1.0; - ui_label = "Raster Bloom %"; - ui_category = "Scaling"; -> = 0.0; - -uniform int shadow_mask < - ui_type = "combo"; - ui_items = "Off\0" - "CGWG\0" - "Lottes TV\0" - "Lottes Aperture\0" - "Lottes Stretched VGA\0" - "Lottes VGA\0" - "Trinitron 1\0" - "Trinitron 2\0" - "Trinitron B/W 1\0" - "Trinitron B/W 2\0" - "Trinitron Magenta/Green/Black\0" - "Trinitron RGBX\0" - "Trinitron 4k 1\0" - "Trinitron RRGGBBX\0" - "Trinitron 4k 2\0"; - - ui_label = "CRT Mask"; - ui_category = "Mask"; -> = 1; - -uniform float maskstr < - ui_type = "drag"; - ui_min = -0.5; - ui_max = 1.0; - ui_step = 0.025; - ui_label = "Mask Strength (1, 6-14)"; - ui_category = "Mask"; -> = 0.3; - -uniform float mcut < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Mask 6-14 Low Strength"; - ui_category = "Mask"; -> = 1.1; - -uniform float maskboost < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 3.0; - ui_step = 0.05; - ui_label = "CRT Mask Boost"; - ui_category = "Mask"; -> = 1.0; - -uniform float masksize < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "CRT Mask Size"; - ui_category = "Mask"; -> = 1.0; - -uniform float mask_zoom < - ui_type = "drag"; - ui_min = -5.0; - ui_max = 5.0; - ui_step = 1.0; - ui_label = "CRT Mask Zoom (+ Mask Width)"; - ui_category = "Mask"; -> = 0.0; - -uniform float zoom_mask < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "CRT Mask Zoom Sharpen"; - ui_category = "Mask"; -> = 0.0; - -uniform float mshift < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.5; - ui_label = "(Transform to) Shadow Mask"; - ui_category = "Mask"; -> = 0.0; - -uniform int mask_layout < - ui_type = "combo"; - ui_items = "RGB\0" - "BGR\0"; - - ui_label = "Mask Layout (Check LCD Panel)"; - ui_category = "Mask"; -> = 0; - -uniform float mask_drk < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Lottes Mask Dark"; - ui_category = "Mask"; - ui_spacing = 2; -> = 0.5; - -uniform float mask_lgt < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Lottes Mask Bright"; - ui_category = "Mask"; -> = 1.5; - -uniform float mask_gamma < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 0.05; - ui_label = "Mask Gamma"; - ui_category = "Mask"; - ui_spacing = 2; -> = 2.4; - -uniform float slotmask1 < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Slot Mask Strength Bright Pixels"; - ui_category = "Mask"; - ui_spacing = 2; -> = 0.0; - -uniform float slotmask2 < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Slot Mask Strength Dark Pixels"; - ui_category = "Mask"; -> = 0.0; - -uniform float slotwidth < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 16.0; - ui_step = 1.0; - ui_label = "Slot Mask Width (0:Auto)"; - ui_category = "Mask"; -> = 0.0; - -uniform float double_slot < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "Slot Mask Height: 2x1 or 4x1"; - ui_category = "Mask"; -> = 2.0; - -uniform float slotms < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 4.0; - ui_step = 1.0; - ui_label = "Slot Mask Thickness"; - ui_category = "Mask"; -> = 1.0; - -uniform bool smoothmask < - ui_type = "radio"; - ui_label = "Smooth Masks In Bright Scanlines"; - ui_category = "Mask"; - ui_spacing = 2; -> = false; - -uniform float smask_mit < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Mitigate Slot Mask Interaction"; - ui_category = "Mask"; -> = 0.0; - -uniform float bmask < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Base (Black) Mask Strength"; - ui_category = "Mask"; -> = 0.0; - -uniform float mclip < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.025; - ui_label = "Preserve Mask Strength"; - ui_category = "Mask"; -> = 0.0; - -// --------------------------------------------------------------------------- -// Vignette / Border -// --------------------------------------------------------------------------- - -uniform float vigstr < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Vignette Strength"; - ui_category = "Vignette / Border"; -> = 0.0; - -uniform float vigdef < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 3.0; - ui_step = 0.1; - ui_label = "Vignette Size"; - ui_category = "Vignette / Border"; -> = 1.0; - -uniform float csize < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.005; - ui_label = "Corner Size"; - ui_category = "Vignette / Border"; -> = 0.0; - -uniform float bsize < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 3.0; - ui_step = 0.01; - ui_label = "Border Size"; - ui_category = "Vignette / Border"; -> = 0.01; - -uniform float sborder < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Border Intensity"; - ui_category = "Vignette / Border"; -> = 0.75; - -// --------------------------------------------------------------------------- -// Curvature -// --------------------------------------------------------------------------- - -uniform float warpx < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Curvature X"; - ui_category = "Curvature"; -> = 0.0; - -uniform float warpy < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Curvature Y"; - ui_category = "Curvature"; -> = 0.0; - -uniform float c_shape < - ui_type = "drag"; - ui_min = 0.05; - ui_max = 0.6; - ui_step = 0.05; - ui_label = "Curvature Shape"; - ui_category = "Curvature"; -> = 0.25; - -// --------------------------------------------------------------------------- -// Deconvergence -// --------------------------------------------------------------------------- - -uniform float dctypex < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.75; - ui_step = 0.05; - ui_label = "Deconvergence Type X: 0:Static | Other:Dynamic"; - ui_category = "Deconvergence"; -> = 0.0; - -uniform float dctypey < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.75; - ui_step = 0.05; - ui_label = "Deconvergence Type Y: 0:Static | Other:Dynamic"; - ui_category = "Deconvergence"; -> = 0.0; - -uniform float deconrx < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Horizontal Deconvergence 'R' Range"; - ui_category = "Deconvergence"; -> = 0.0; - -uniform float decongx < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Horizontal Deconvergence 'G' Range"; - ui_category = "Deconvergence"; -> = 0.0; - -uniform float deconbx < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Horizontal Deconvergence 'B' Range"; - ui_category = "Deconvergence"; -> = 0.0; - -uniform float deconry < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Vertical Deconvergence 'R' Range"; - ui_category = "Deconvergence"; -> = 0.0; - -uniform float decongy < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Vertical Deconvergence 'G' Range"; - ui_category = "Deconvergence"; -> = 0.0; - -uniform float deconby < - ui_type = "drag"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 0.25; - ui_label = "Vertical Deconvergence 'B' Range"; - ui_category = "Deconvergence"; -> = 0.0; - -uniform float decons < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 3.0; - ui_step = 0.1; - ui_label = "Deconvergence Strength"; - ui_category = "Deconvergence"; -> = 1.0; - -// --------------------------------------------------------------------------- -// Noise -// --------------------------------------------------------------------------- - -uniform float barspeed < - ui_type = "drag"; - ui_min = 5.0; - ui_max = 200.0; - ui_step = 1.0; - ui_label = "Hum Bar Speed"; - ui_category = "Hum Bar"; -> = 50.0; - -uniform float barintensity < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "Hum Bar Intensity"; - ui_category = "Hum Bar"; -> = 0.0; - -uniform float bardir < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Hum Bar Direction"; - ui_category = "Hum Bar"; -> = 0.0; - -// --------------------------------------------------------------------------- -// Noise -// --------------------------------------------------------------------------- - -uniform float addnoised < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.02; - ui_label = "Add Noise"; - ui_category = "Noise"; -> = 0.0; - -uniform float noiseresd < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 10.0; - ui_step = 1.0; - ui_label = "Noise Resolution"; - ui_category = "Noise"; -> = 2.0; - -uniform int noisetype < - ui_type = "combo"; - ui_items = "Colored\0" - "Luma\0"; - - ui_label = "Noise Type"; - ui_category = "Noise"; -> = 0; - - -uniform float FrameCount < source = "framecount"; >; - -uniform float NativeWidth < source = "nativewidth"; >; -uniform float NativeHeight < source = "nativeheight"; >; -uniform float InternalWidth < source = "internalwidth"; >; -uniform float InternalHeight < source = "internalheight"; >; -uniform float BufferWidth < source = "bufferwidth"; >; -uniform float BufferHeight < source = "bufferheight"; >; - -uniform float ViewportX < source = "viewportx"; >; -uniform float ViewportY < source = "viewporty"; >; -uniform float2 ViewportOffset < source = "viewportoffset"; >; -uniform float ViewportWidth < source = "viewportwidth"; >; -uniform float ViewportHeight < source = "viewportheight"; >; -uniform float2 ViewportSize < source = "viewportsize"; >; - -// InternalSize / NativeSize -uniform float UpscaleMultiplier < source = "upscale_multiplier"; >; - -// ViewportSize / InternalSize -uniform float2 InternalPixelSize < source = "internal_pixel_size"; >; - -// ViewportSize / InternalSize / BufferSize -uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >; - -// ViewportSize / NativeSize -uniform float2 NativePixelSize < source = "native_pixel_size"; >; - -// ViewportSize / NativeSize / BufferSize -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; - -// BufferSize / ViewportSize -uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >; - -#ifndef Resolution_X -#define Resolution_X BUFFER_WIDTH -#endif - -#ifndef Resolution_Y -#define Resolution_Y BUFFER_HEIGHT -#endif - -#define SIGNAL1 float2(4.0 * Resolution_X, Resolution_Y) -#define SIGNAL2 float2(2.0 * Resolution_X, Resolution_Y) - -#define OutputSize float4(BUFFER_SCREEN_SIZE, 1.0 / BUFFER_SCREEN_SIZE) - -#define TextureSize (1.0 / NormalizedNativePixelSize) -#define OriginalSize float4(TextureSize, 1.0 / TextureSize) - -#define InputSize float2(800.00000000, 600.00000000) -#define SourceSize float4(InputSize, 1.0 / InputSize) - -#define fuxcoord (texcoord * 1.00001) -#define scans 1.5 * scans -#define eps 1e-8 -#define pii 3.14159265 -#define fracoord (fuxcoord * OutputSize.xy) - -#define COMPAT_TEXTURE(c, d) tex2D(c, d) - -#define NTSC_01 float4(SIGNAL1, 1.0 / SIGNAL1) -#define NTSC_02 float4(SIGNAL2, 1.0 / SIGNAL2) - -#define mix_m float3x3(BRIGHTNESS, ARTIFACT, ARTIFACT, \ - FRINGING, 2.0*SATURATION, 0.0, \ - FRINGING, 0.0, 2.0*SATURATION) - -#define rgb_m float3x3(0.299 , 0.587, 0.114, \ - 0.596, -0.274, -0.322, \ - 0.211, -0.523, 0.312) - -#define yiq_m float3x3(1.000, 0.956, 0.621, \ - 1.000, -0.272, -0.647, \ - 1.000, -1.106, 1.703) - -#define tex_1 texcoord - float2(0.25 * OriginalSize.z / 4.0, 0.0) -#define tex_2 texcoord - float2(0.25 * OriginalSize.z / 4.0, 0.0) - -#define inv_sqr_h 1.0 / (2.0 * SIGMA_H * SIGMA_H) -#define inv_sqr_v 1.0 / (2.0 * SIGMA_V * SIGMA_V) -#define inv_sqr_x 1.0 / (2.0 * SIGMA_X * SIGMA_X) -#define inv_sqr_y 1.0 / (2.0 * SIGMA_Y * SIGMA_Y) - -#define fetch_offset1(dx) tex2D(NTSC_S03, tex_1 + dx).xyz + \ - tex2D(NTSC_S03, tex_1 - dx).xyz - -#define fetch_offset2(dx) float3(tex2D(NTSC_S03, tex_1 + dx.xz).x + \ - tex2D(NTSC_S03, tex_1 - dx.xz).x, \ - tex2D(NTSC_S03, tex_1 + dx.yz).yz + \ - tex2D(NTSC_S03, tex_1 - dx.yz).yz) - -#define NTSC_S00 ReShade::BackBuffer - -texture NTSC_T01 -{ - Width = 1.0 * Resolution_X; - Height = Resolution_Y; - Format = RGBA32F; -}; -sampler NTSC_S01 -{ - Texture = NTSC_T01; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = POINT; - MinFilter = POINT; - MipFilter = POINT; -}; - -texture NTSC_T02 -{ - Width = 1.0 * Resolution_X; - Height = Resolution_Y; - Format = RGBA16F; -}; -sampler NTSC_S02 -{ - Texture = NTSC_T02; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = POINT; - MinFilter = POINT; - MipFilter = POINT; -}; - -texture NTSC_T03 -{ - Width = 4.0 * Resolution_X; - Height = Resolution_Y; - Format = RGBA16F; -}; -sampler NTSC_S03 -{ - Texture = NTSC_T03; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T04 -{ - Width = 2.0 * Resolution_X; - Height = Resolution_Y; - Format = RGBA16F; -}; -sampler NTSC_S04 -{ - Texture = NTSC_T04; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T05 -{ - Width = 2.0 * Resolution_X; - Height = Resolution_Y; - Format = RGBA16F; -}; -sampler NTSC_S05 -{ - Texture = NTSC_T05; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T06 -{ - Width = 2.0 * Resolution_X; - Height = Resolution_Y; - Format = RGBA16F; -}; -sampler NTSC_S06 -{ - Texture = NTSC_T06; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T07 -{ - Width = 2.0 * Resolution_X; - Height = Resolution_Y; - Format = RGBA16F; -}; -sampler NTSC_S07 -{ - Texture = NTSC_T07; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T08 -{ - Width = 2.0 * Resolution_X; - Height = Resolution_Y; - Format = RGBA16F; -}; -sampler NTSC_S08 -{ - Texture = NTSC_T08; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T09 -{ - Width = 1.0 * BUFFER_WIDTH; - Height = Resolution_Y; - Format = RGBA16F; -}; -sampler NTSC_S09 -{ - Texture = NTSC_T09; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T10 -{ - Width = 1.0 * 800.00000000; - Height = 600.00000000; - Format = RGBA16F; -}; -sampler NTSC_S10 -{ - Texture = NTSC_T10; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T11 -{ - Width = 1.0 * 800.00000000; - Height = 600.00000000; - Format = RGBA16F; -}; -sampler NTSC_S11 -{ - Texture = NTSC_T11; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T12 -{ - Width = 1.0 * 800.00000000; - Height = 600.00000000; - Format = RGBA16F; -}; -sampler NTSC_S12 -{ - Texture = NTSC_T12; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T13 -{ - Width = 1.0 * 800.00000000; - Height = 600.00000000; - Format = RGBA16F; -}; -sampler NTSC_S13 -{ - Texture = NTSC_T13; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_T14 -{ - Width = 1.0 * BUFFER_WIDTH; - Height = BUFFER_HEIGHT; - Format = RGBA16F; -}; -sampler NTSC_S14 -{ - Texture = NTSC_T14; - AddressU = BORDER; - AddressV = BORDER; - AddressW = BORDER; - MagFilter = LINEAR; - MinFilter = LINEAR; - MipFilter = LINEAR; -}; - -texture NTSC_001 < source = "CRT-LUT-1.png"; > -{ - Width = 1024; - Height = 32; -}; -sampler NTSC_L01 -{ - Texture = NTSC_001; -}; - -texture NTSC_002 < source = "CRT-LUT-2.png"; > -{ - Width = 1024; - Height = 32; -}; -sampler NTSC_L02 -{ - Texture = NTSC_002; -}; - -texture NTSC_003 < source = "CRT-LUT-3.png"; > { - Width = 1024; - Height = 32; -}; -sampler NTSC_L03 -{ - Texture = NTSC_003; -}; - -texture NTSC_004 < source = "CRT-LUT-4.png"; > -{ - Width = 1024; - Height = 32; -}; -sampler NTSC_L04 -{ - Texture = NTSC_004; -}; - -float3 fix_lut(float3 lut, float3 ref) -{ - float r = length(ref); - float l = length(lut); - float m = max(max(ref.r, ref.g), ref.b); - ref = normalize(lut + 0.0000001) * lerp(r, l, pow(m, 1.25)); - return lerp(lut, ref, LUTBR); -} - -float vignette(float2 pos) -{ - float2 b = vigdef * float2(1.0, ViewportWidth / ViewportHeight) * 0.125; - pos = clamp(pos, 0.0, 1.0); - pos = abs(2.0 * (pos - 0.5)); - float2 res = lerp(0.0.xx, 1.0.xx, smoothstep(1.0.xx, 1.0.xx - b, sqrt(pos))); - res = pow(res, 0.70.xx); - return max(lerp(1.0, sqrt(res.x * res.y), vigstr), 0.0); -} - -float contrast(float x) -{ - return max(lerp(x, smoothstep(0.0, 1.0, x), contr), 0.0); -} - -float dist(float3 A, float3 B) -{ - float r = 0.5 * (A.r + B.r); - float3 d = A - B; - float3 c = float3(2. + r, 4., 3. - r); - return sqrt(dot(c * d, d)) / 3.; -} - -float3 plant(float3 tar, float r) -{ - float t = max(max(tar.r, tar.g), tar.b) + 0.00001; - return tar * r / t; -} - -float3 fetch_pixel(float2 coord) -{ - float2 dx = float2(NTSC_02.z, 0.0) * downsample_levelx * ViewportWidth / NativeWidth; - float2 dy = float2(0.0, NTSC_02.w) * downsample_levely * ViewportHeight / NativeHeight; - float2 d1 = dx + dy; - float2 d2 = dx - dy; - float sum = 15.0; - - float3 result = 3.0 * COMPAT_TEXTURE(NTSC_S06, coord).rgb + - 2.0 * COMPAT_TEXTURE(NTSC_S06, coord + dx).rgb + - 2.0 * COMPAT_TEXTURE(NTSC_S06, coord - dx).rgb + - 2.0 * COMPAT_TEXTURE(NTSC_S06, coord + dy).rgb + - 2.0 * COMPAT_TEXTURE(NTSC_S06, coord - dy).rgb + - COMPAT_TEXTURE(NTSC_S06, coord + d1).rgb + - COMPAT_TEXTURE(NTSC_S06, coord - d1).rgb + - COMPAT_TEXTURE(NTSC_S06, coord + d2).rgb + - COMPAT_TEXTURE(NTSC_S06, coord - d2).rgb; - - return result / sum; -} - -float crthd_h(float x, float y) -{ - float invsigmah = 1.0 / (2.0 * SIGMA_HOR * SIGMA_HOR * y * y); - return exp(-x * x * invsigmah); -} - -float gauss_h(float x) -{ - return exp(-x * x * inv_sqr_h); -} - -float gauss_v(float x) -{ - return exp(-x * x * inv_sqr_v); -} - -float bloom_h(float x) -{ - return exp(-x * x * inv_sqr_x); -} - -float bloom_v(float x) -{ - return exp(-x * x * inv_sqr_y); -} - -float mod(float x,float y) -{ - return x-y* floor(x/y); -} - -float st0(float x) -{ - return exp2(-10.0 * x * x); -} - -float st1(float x) -{ - return exp2(-8.0 * x * x); -} - -float3 sw0(float x, float color, float scanline, float3 c) -{ - float3 xe = lerp(1.0.xxx + scans, 1.0.xxx, c); - float tmp = lerp(beam_min, beam_max, color); - float ex = x * tmp; - ex = (gsl > 0) ? ex * ex : lerp(ex * ex, ex * ex * ex, 0.4); - return exp2(-scanline * ex * xe); -} - -float3 sw1(float x, float color, float scanline, float3 c) -{ - float3 xe = lerp(1.0.xxx + scans, 1.0.xxx, c); - x = lerp(x, beam_min * x, max(x - 0.4 * color, 0.0)); - float tmp = lerp(1.2 * beam_min, beam_max, color); - float ex = x * tmp; - return exp2(-scanline * ex * ex * xe); -} - -float3 sw2(float x, float color, float scanline, float3 c) -{ - float3 xe = lerp(1.0.xxx + scans, 1.0.xxx, c); - float tmp = lerp((2.5 - 0.5 * color) * beam_min, beam_max, color); - tmp = lerp(beam_max, tmp, pow(x, color + 0.3)); - float ex = x * tmp; - return exp2(-scanline * ex * ex * xe); -} - -float2 overscan(float2 pos, float dx, float dy) -{ - pos = pos * 2.0 - 1.0; - pos *= float2(dx, dy); - return pos * 0.5 + 0.5; -} - -float2 warp(float2 pos) -{ - pos = pos * 2.0 - 1.0; - pos = lerp(pos, - float2(pos.x * rsqrt(1.0 - c_shape * pos.y * pos.y), - pos.y * rsqrt(1.0 - c_shape * pos.x * pos.x)), - float2(warpx, warpy) / c_shape); - - return pos * 0.5 + 0.5; -} - -float3 gc(float3 c) -{ - float mc = max(max(c.r, c.g), c.b); - float mg = pow(mc, 1.0 / gamma_c); - return c * mg / (mc + eps); -} - -float3 rgb2yiq(float3 r) -{ - return mul(rgb_m, r); -} - -float3 yiq2rgb(float3 y) -{ - return mul(yiq_m, y); -} - -float get_luma(float3 c) -{ - return dot(c, float3(0.2989, 0.5870, 0.1140)); -} - -float3 crt_mask(float2 pos, float mx, float mb) -{ - float3 mask = mask_drk; - float3 one = 1.0; - - // CGWG - if (shadow_mask == 1) { - float mc = 1.0 - max(maskstr, 0.0); - pos.x = frac(pos.x * 0.5); - - if (pos.x < 0.49) { - mask.r = 1.0; - mask.g = mc; - mask.b = 1.0; - } else { - mask.r = mc; - mask.g = 1.0; - mask.b = mc; - } - - // Lottes - Very compressed TV style shadow mask - } else if (shadow_mask == 2) { - float lane = mask_lgt; - float odd = 0.0; - - if (frac(pos.x / 6.0) < 0.49) { - odd = 1.0; - } - if (frac((pos.y + odd) / 2.0) < 0.49) { - lane = mask_drk; - } - - pos.x = floor(mod(pos.x, 3.0)); - - if (pos.x < 0.5) mask.r = mask_lgt; - else if (pos.x < 1.5) mask.g = mask_lgt; - else mask.b = mask_lgt; - - mask *= lane; - - // Lottes - Aperture-grille - } else if (shadow_mask == 3) { - pos.x = floor(mod(pos.x, 3.0)); - - if (pos.x < 0.5) mask.r = mask_lgt; - else if (pos.x < 1.5) mask.g = mask_lgt; - else mask.b = mask_lgt; - - // Lottes - Stretched VGA style shadow mask (same as prior shaders) - } else if (shadow_mask == 4) { - pos.x += pos.y * 3.0; - pos.x = frac(pos.x / 6.0); - - if (pos.x < 0.3) mask.r = mask_lgt; - else if (pos.x < 0.6) mask.g = mask_lgt; - else mask.b = mask_lgt; - - // Lottes - VGA style shadow mask - } else if (shadow_mask == 5) { - pos.xy = floor(pos.xy * float2(1.0, 0.5)); - pos.x += pos.y * 3.0; - pos.x = frac(pos.x / 6.0); - - if (pos.x < 0.3) mask.r = mask_lgt; - else if (pos.x < 0.6) mask.g = mask_lgt; - else mask.b = mask_lgt; - - // Trinitron mask 1 - } else if (shadow_mask == 6) { - mask = 0.0; - pos.x = frac(pos.x / 2.0); - - if (pos.x < 0.49) { - mask.r = 1.0; - mask.b = 1.0; - } else { - mask.g = 1.0; - } - mask = clamp(lerp(lerp(one, mask, mcut), - lerp(one, mask, maskstr), mx), - 0.0, 1.0); - - // Trinitron mask 2 - } else if (shadow_mask == 7) { - mask = 0.0; - pos.x = floor(mod(pos.x, 3.0)); - - if (pos.x < 0.5) mask.r = 1.0; - else if (pos.x < 1.5) mask.g = 1.0; - else mask.b = 1.0; - - mask = clamp(lerp(lerp(one, mask, mcut), - lerp(one, mask, maskstr), mx), - 0.0, 1.0); - - // Trinitron B/W mask 1 - } else if (shadow_mask == 8) { - mask = 0.0; - pos.x = frac(pos.x / 2.0); - - if (pos.x < 0.49) mask = 0.0.xxx; - else mask = 1.0.xxx; - - mask = clamp(lerp(lerp(one, mask, mcut), - lerp(one, mask, maskstr), mx), - 0.0, 1.0); - - // Trinitron B/W mask 2 - } else if (shadow_mask == 9) { - mask = 0.0; - pos.x = frac(pos.x / 3.0); - - if (pos.x < 0.3) mask = 0.0.xxx; - else if (pos.x < 0.6) mask = 1.0.xxx; - else mask = 1.0.xxx; - - mask = clamp(lerp(lerp(one, mask, mcut), lerp(one, mask, maskstr), mx), - 0.0, 1.0); - - // Trinitron Magenta - Green - Black mask - } else if (shadow_mask == 10) { - mask = 0.0; - pos.x = frac(pos.x / 3.0); - - if (pos.x < 0.3) mask = 0.0.xxx; - else if (pos.x < 0.6) mask.rb = 1.0.xx; - else mask.g = 1.0; - - mask = clamp(lerp(lerp(one, mask, mcut), - lerp(one, mask, maskstr), mx), - 0.0, 1.0); - - // Trinitron RGBX mask - } else if (shadow_mask == 11) { - mask = 0.0; - pos.x = frac(pos.x * 0.25); - - if (pos.x < 0.2) mask = 0.0.xxx; - else if (pos.x < 0.4) mask.r = 1.0; - else if (pos.x < 0.7) mask.g = 1.0; - else mask.b = 1.0; - - mask = clamp(lerp(lerp(one, mask, mcut), - lerp(one, mask, maskstr), mx), - 0.0, 1.0); - - // Trinitron 4k mask 1 - } else if (shadow_mask == 12) { - mask = 0.0; - pos.x = frac(pos.x * 0.25); - - if (pos.x < 0.2) mask.r = 1.0; - else if (pos.x < 0.4) mask.rg = 1.0.xx; - else if (pos.x < 0.7) mask.gb = 1.0.xx; - else mask.b = 1.0; - - mask = clamp(lerp(lerp(one, mask, mcut), - lerp(one, mask, maskstr), mx), - 0.0, 1.0); - - // Trinitron RRGGBBX mask - } else if (shadow_mask == 13) { - mask = 0.0; - pos.x = floor(mod(pos.x, 7.0)); - - if (pos.x < 0.5) mask = 0.0.xxx; - else if (pos.x < 2.5) mask.r = 1.0; - else if (pos.x < 4.5) mask.g = 1.0; - else mask.b = 1.0; - - mask = clamp(lerp(lerp(one, mask, mcut), - lerp(one, mask, maskstr), mx), - 0.0, 1.0); - - // Trinitron 4k mask 2 - } else { - mask = 0.0; - pos.x = floor(mod(pos.x, 6.0)); - - if (pos.x < 0.5) mask = 0.0.xxx; - else if (pos.x < 1.5) mask.r = 1.0; - else if (pos.x < 2.5) mask.rg = 1.0.xx; - else if (pos.x < 3.5) mask.rgb = 1.0.xxx; - else if (pos.x < 4.5) mask.gb = 1.0.xx; - else mask.b = 1.0; - - mask = clamp(lerp(lerp(one, mask, mcut), - lerp(one, mask, maskstr), mx), - 0.0, 1.0); - } - - if (mask_layout > 0.5) { - mask = mask.rbg; - } - - float maskmin = min(min(mask.r, mask.g), mask.b); - - return (mask - maskmin) * (1.0 + (maskboost - 1.0) * mb) + maskmin; -} - -float slt_mask(float2 pos, float m, float swidth) -{ - if ((slotmask1 + slotmask2) == 0.0) { - return 1.0; - } else { - pos.y = floor(pos.y / slotms); - - float mlen = swidth * 2.0; - - float px = floor(mod(pos.x, 0.99999 * mlen)); - float py = floor(frac(pos.y / (2.0 * double_slot)) * 2.0 * double_slot); - - float slot_dark = lerp(1.0 - slotmask2, 1.0 - slotmask1, m); - float slot = 1.0; - - if (py == 0.0 && px < swidth) { - slot = slot_dark; - } else if (py == double_slot && px >= swidth) { - slot = slot_dark; - } - return slot; - } -} - -float humbars(float pos) -{ - if (barintensity == 0.0) { - return 1.0; - } else { - pos = (barintensity >= 0.0) ? pos : (1.0 - pos); - pos = frac(pos + mod(FrameCount, barspeed) / (barspeed - 1.0)); - pos = (barintensity < 0.0) ? pos : (1.0 - pos); - - return (1.0 - barintensity) + barintensity * pos; - } -} - -float corner(float2 pos) -{ - float vp_ratio = ViewportWidth / ViewportHeight; - float2 bc = bsize * float2(1.0, vp_ratio) * 0.05; - - pos = clamp(pos, 0.0, 1.0); - pos = abs(2.0 * (pos - 0.5)); - - float csz = lerp(400.0, 7.0, pow(4.0 * csize, 0.10)); - float crn = dot(pow(pos, csz.xx * float2(1.0, 1.0 / vp_ratio)), 1.0.xx); - crn = (csize == 0.0) ? max(pos.x, pos.y) : pow(crn, 1.0 / csz); - - pos = max(pos, crn); - - float2 rs = (bsize == 0.0) ? 1.0.xx - : lerp(0.0.xx, 1.0.xx, - smoothstep(1.0.xx, 1.0.xx - bc, sqrt(pos))); - - rs = pow(rs, sborder.xx); - - return sqrt(rs.x * rs.y); -} - -float3 declip(float3 c, float b) -{ - float m = max(max(c.r, c.g), c.b); - if (m > b) { - c = c * b / m; - } - return c; -} - -float igc(float mc) -{ - return pow(mc, gamma_c); -} - -float3 noise(float3 v) -{ - if (addnoised < 0.0) { - v.z = -addnoised; - } else { - v.z = mod(v.z, 6001.0) / 1753.0; - } - v = frac(v) + frac(v * 1e4) + frac(v * 1e-4); - v += float3(0.12345, 0.6789, 0.314159); - v = frac(v * dot(v, v) * 123.456); - v = frac(v * dot(v, v) * 123.456); - v = frac(v * dot(v, v) * 123.456); - v = frac(v * dot(v, v) * 123.456); - return v; -} - -void bring_pixel(inout float3 c, inout float3 b, inout float3 g, float2 coord, - float2 boord) -{ - float stepx = OutputSize.z; - float stepy = OutputSize.w; - - float2 dx = float2(stepx, 0.0); - float2 dy = float2(0.0, stepy); - - float posx = 2.0 * coord.x - 1.0; - float posy = 2.0 * coord.y - 1.0; - - if (dctypex > 0.025) { - posx = sign(posx) * pow(abs(posx), 1.05 - dctypex); - dx = posx * dx; - } - if (dctypey > 0.025) { - posy = sign(posy) * pow(abs(posy), 1.05 - dctypey); - dy = posy * dy; - } - - float2 rc = deconrx * dx + deconry * dy; - float2 gc = decongx * dx + decongy * dy; - float2 bc = deconbx * dx + deconby * dy; - - float r1 = COMPAT_TEXTURE(NTSC_S14, coord + rc).r; - float g1 = COMPAT_TEXTURE(NTSC_S14, coord + gc).g; - float b1 = COMPAT_TEXTURE(NTSC_S14, coord + bc).b; - - float ds = decons; - float3 d = float3(r1, g1, b1); - - c = clamp(lerp(c, d, ds), 0.0, 1.0); - r1 = COMPAT_TEXTURE(NTSC_S13, boord + rc).r; - g1 = COMPAT_TEXTURE(NTSC_S13, boord + gc).g; - b1 = COMPAT_TEXTURE(NTSC_S13, boord + bc).b; - d = float3(r1, g1, b1); - - b = g = lerp(b, d, min(ds, 1.0)); - - r1 = COMPAT_TEXTURE(NTSC_S11, boord + rc).r; - g1 = COMPAT_TEXTURE(NTSC_S11, boord + gc).g; - b1 = COMPAT_TEXTURE(NTSC_S11, boord + bc).b; - d = float3(r1, g1, b1); - g = lerp(g, d, min(ds, 1.0)); -} - -float4 AfterglowPS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float2 dx = float2(OriginalSize.z, 0.0); - float2 dy = float2(0.0, OriginalSize.w); - - float w = 1.0; - - float2 tc = texcoord + float2(mod(ViewportX + 1.0, 2.0) * 1.0 / BufferWidth, - mod(ViewportY + 1.0, 2.0) * 1.0 / BufferHeight); - - float3 color0 = COMPAT_TEXTURE(NTSC_S00, tc.xy).rgb; - float3 color1 = COMPAT_TEXTURE(NTSC_S00, tc.xy - dx).rgb; - float3 color2 = COMPAT_TEXTURE(NTSC_S00, tc.xy + dx).rgb; - float3 color3 = COMPAT_TEXTURE(NTSC_S00, tc.xy - dy).rgb; - float3 color4 = COMPAT_TEXTURE(NTSC_S00, tc.xy + dy).rgb; - - float3 clr = (2.5 * color0 + color1 + color2 + color3 + color4) / 6.5; - float3 a = COMPAT_TEXTURE(NTSC_S01, texcoord.xy).rgb; - - if ((color0.r + color0.g + color0.b < 5.0 / 255.0)) { - w = 0.0; - } - - float3 result = lerp(max(lerp(clr, a, 0.49 + float3(PR, PG, PB)) - 1.25 / 255.0, - 0.0), - clr, - w); - - return float4(result, w); -} - -float4 PreShaderPS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - const float3x3 Profile0 = float3x3(0.412391, 0.212639, 0.019331, - 0.357584, 0.715169, 0.119195, - 0.180481, 0.072192, 0.950532); - - const float3x3 Profile1 = float3x3(0.430554, 0.222004, 0.020182, - 0.341550, 0.706655, 0.129553, - 0.178352, 0.071341, 0.939322); - - const float3x3 Profile2 = float3x3(0.396686, 0.210299, 0.006131, - 0.372504, 0.713766, 0.115356, - 0.181266, 0.075936, 0.967571); - - const float3x3 Profile3 = float3x3(0.393521, 0.212376, 0.018739, - 0.365258, 0.701060, 0.111934, - 0.191677, 0.086564, 0.958385); - - const float3x3 Profile4 = float3x3(0.392258, 0.209410, 0.016061, - 0.351135, 0.725680, 0.093636, - 0.166603, 0.064910, 0.850324); - - const float3x3 Profile5 = float3x3( 0.377923, 0.195679, 0.010514, - 0.317366, 0.722319, 0.097826, - 0.207738, 0.082002, 1.076960); - - - const float3x3 ToRGB = float3x3( 3.240970, -0.969244, 0.055630, - -1.537383, 1.875968, -0.203977, - -0.498611, 0.041555, 1.056972); - - const float3x3 ToModern = float3x3( 2.791723, -0.894766, 0.041678, - -1.173165, 1.815586, -0.130886, - -0.440973, 0.032000, 1.002034); - - const float3x3 ToDCI = float3x3( 2.493497, -0.829489, 0.035846, - -0.931384, 1.762664, -0.076172, - -0.402711, 0.023625, 0.956885); - - const float3x3 ToAdobe = float3x3( 2.041588, -0.969244, 0.013444, - -0.565007, 1.875968, -0.118360, - -0.344731, 0.041555, 1.015175); - - const float3x3 ToREC = float3x3( 1.716651, -0.666684, 0.017640, - -0.355671, 1.616481, -0.042771, - -0.253366, 0.015769, 0.942103); - - - const float3x3 D65_to_D55 = float3x3(0.4850339153, 0.2500956126, 0.0227359648, - 0.3488957224, 0.6977914447, 0.1162985741, - 0.1302823568, 0.0521129427, 0.6861537456); - - const float3x3 D65_to_D93 = float3x3(0.3412754080, 0.1759701322, 0.0159972847, - 0.3646170520, 0.7292341040, 0.1215390173, - 0.2369894093, 0.0947957637, 1.2481442225); - - float2 tc = texcoord + float2(mod(ViewportX + 1.0, 2.0) * 1.0 / BufferWidth, - mod(ViewportY + 1.0, 2.0) * 1.0 / BufferHeight); - - float4 imgcolor = COMPAT_TEXTURE(NTSC_S00, tc); - float4 afterglow = COMPAT_TEXTURE(NTSC_S01, tc); - - float w = 1.0 - afterglow.w; - float l = length(afterglow.rgb); - - afterglow.rgb = AS * w * normalize(pow(afterglow.rgb + 0.01, sat)) * l; - - float bp = w * BP / 255.0; - - if (sega_fix > 0.5) { - imgcolor.rgb = imgcolor.rgb * (255.0 / 239.0); - } - - imgcolor.rgb = min(imgcolor.rgb, 1.0); - float3 color = imgcolor.rgb; - - if (TNTC == 0) { - color.rgb = imgcolor.rgb; - } else { - float lutlow = LUTLOW / 255.0; - float invLS = 1.0 / 32.0; - - float3 lut_ref = imgcolor.rgb + - lutlow * (1.0 - pow(imgcolor.rgb, 0.333.xxx)); - - float lutb = lut_ref.b * (1.0 - 0.5 * invLS); - lut_ref.rg = lut_ref.rg * (1.0 - invLS) + 0.5 * invLS; - - float tile1 = ceil(lutb * (32.0 - 1.0)); - float tile0 = max(tile1 - 1.0, 0.0); - - float f = frac(lutb * (32.0 - 1.0)); - if (f == 0.0) { - f = 1.0; - } - - float2 coord0 = float2(tile0 + lut_ref.r, lut_ref.g) * - float2(invLS, 1.0); - - float2 coord1 = float2(tile1 + lut_ref.r, lut_ref.g) * - float2(invLS, 1.0); - - float4 color1, color2, res; - - if (TNTC == 1) { - color1 = COMPAT_TEXTURE(NTSC_L01, coord0); - color2 = COMPAT_TEXTURE(NTSC_L01, coord1); - res = lerp(color1, color2, f); - - } else if (TNTC == 2) { - color1 = COMPAT_TEXTURE(NTSC_L02, coord0); - color2 = COMPAT_TEXTURE(NTSC_L02, coord1); - res = lerp(color1, color2, f); - - } else if (TNTC == 3) { - color1 = COMPAT_TEXTURE(NTSC_L03, coord0); - color2 = COMPAT_TEXTURE(NTSC_L03, coord1); - res = lerp(color1, color2, f); - - } else if (TNTC == 4) { - color1 = COMPAT_TEXTURE(NTSC_L04, coord0); - color2 = COMPAT_TEXTURE(NTSC_L04, coord1); - res = lerp(color1, color2, f); - } - - res.rgb = fix_lut(res.rgb, imgcolor.rgb); - color = lerp(imgcolor.rgb, res.rgb, min(float(TNTC), 1.0)); - } - - float3 c = clamp(color, 0.0, 1.0); - float3x3 m_o; - - float p; - if (CS == 0) { p = 2.2; m_o = ToRGB; } - else if (CS == 1) { p = 2.2; m_o = ToModern; } - else if (CS == 2) { p = 2.6; m_o = ToDCI; } - else if (CS == 3) { p = 2.2; m_o = ToAdobe; } - else if (CS == 4) { p = 2.4; m_o = ToREC; } - - color = pow(c, p); - float3x3 m_i; - - if (CP == 1) m_i = Profile0; - else if (CP == 2) m_i = Profile1; - else if (CP == 3) m_i = Profile2; - else if (CP == 4) m_i = Profile3; - else if (CP == 5) m_i = Profile4; - else if (CP == 6) m_i = Profile5; - - color = mul(color, m_i); - color = mul(color, m_o); - color = clamp(color, 0.0, 1.0); - color = pow(color, 1.0 / p); - - if (CP == 0) { - color = c; - } - - float3 scolor1 = plant(pow(color, wp_saturation), - max(max(color.r, color.g), color.b)); - - float luma = dot(color, float3(0.299, 0.587, 0.114)); - - float3 scolor2 = lerp(luma, color, wp_saturation); - - color = (wp_saturation > 1.0) ? scolor1 : scolor2; - color = plant(color, contrast(max(max(color.r, color.g), color.b))); - p = 2.2; - color = clamp(color, 0.0, 1.0); - color = pow(color, p); - - float3 warmer = mul(color, D65_to_D55); - warmer = mul(warmer, ToRGB); - - float3 cooler = mul(color, D65_to_D93); - cooler = mul(cooler, ToRGB); - - float m = abs(WP) / 100.0; - float3 comp = (WP < 0.0) ? cooler : warmer; - - color = lerp(color, comp, m); - color = pow(max(color, 0.0), 1.0 / p); - - if (BP > -0.5) { - color = color + afterglow.rgb + bp; - } else { - color = max(color + BP / 255.0, 0.0) / - (1.0 + BP / 255.0 * step(-BP / 255.0, - max(max(color.r, color.g), color.b))) + - afterglow.rgb; - } - - color = min(color * pre_bb, 1.0); - - return float4(color, vignette(tc)); -} - -float4 Signal_1_PS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float pix_res = min(ntsc_scale, 1.0); - - float phase = (ntsc_phase == 0) ? ((NativeWidth > 300.0) ? 2.0 : 3.0) - : ((ntsc_phase > 1) ? 3.0 : 2.0); - if (ntsc_phase == 3) { - phase = 3.0; - } - - float res = ntsc_scale; - float mod1 = 2.0; - float mod2 = 3.0; - - float CHROMA_MOD_FREQ = (phase < 2.5) ? (4.0 * pii / 15.0) : (pii / 3.0); - float ARTIFACT = cust_artifacting; - float FRINGING = cust_fringing; - float BRIGHTNESS = ntsc_brt; - float SATURATION = ntsc_sat; - float MERGE = 0.0; - float mix1 = 0.0; - - if (ntsc_fields == 0 && phase == 3.0) MERGE = 1.0; - else if (ntsc_fields == 1) MERGE = 0.0; - else if (ntsc_fields == 2) MERGE = 1.0; - - float2 pix_no = texcoord * OriginalSize.xy * pix_res * float2(4.0, 1.0); - float3 col0 = tex2D(NTSC_S02, texcoord).rgb; - - float3 yiq1 = rgb2yiq(col0); - float c0 = yiq1.x; - yiq1.x = pow(yiq1.x, ntsc_gamma); - float lum = yiq1.x; - float2 dx = float2(OriginalSize.z, 0.0); - - float3 c1 = tex2D(NTSC_S02, texcoord - dx).rgb; - float3 c2 = tex2D(NTSC_S02, texcoord + dx).rgb; - - if (abs(ntsc_rainbow) > 0.025) { - float2 dy = float2(0.0, OriginalSize.w); - - float3 c3 = tex2D(NTSC_S02, texcoord + dy).rgb; - float3 c4 = tex2D(NTSC_S02, texcoord + dx + dy).rgb; - float3 c5 = tex2D(NTSC_S02, texcoord + dx + dx).rgb; - float3 c6 = tex2D(NTSC_S02, texcoord + dx * 3.0).rgb; - - c1.x = get_luma(c1); - c2.x = get_luma(c2); - c3.x = get_luma(c3); - c4.x = get_luma(c4); - c5.x = get_luma(c5); - c6.x = get_luma(c6); - - float mix2 = min(5.0 * min(min(abs(c0 - c1.x), abs(c0 - c2.x)), - min(abs(c2.x - c5.x), abs(c5.x - c6.x))), - 1.0); - - float bar1 = 1.0 - min(7.0 * min(max(max(c0, c3.x) - 0.15, 0.0), - max(max(c2.x, c4.x) - 0.15, 0.0)), - 1.0); - - float bar2 = step(abs(c1.x - c2.x) + abs(c0 - c5.x) + abs(c2.x - c6.x), - 0.325); - - mix1 = bar1 * bar2 * mix2 * (1.0 - min(10.0 * min(abs(c0 - c3.x), abs(c2.x - c4.x)), 1.0)); - mix1 = mix1 * ntsc_rainbow; - } - - if (ntsc_phase == 3) { - float mix3 = min(5.0 * abs(c1.x - c2.x), 1.0); - - c1.x = pow(c1.x, ntsc_gamma); - c2.x = pow(c2.x, ntsc_gamma); - - yiq1.x = lerp(min(0.5 * (yiq1.x + max(c1.x, c2.x)), - max(yiq1.x, min(c1.x, c2.x))), - yiq1.x, - mix3); - } - - float3 yiq2 = yiq1; - float3 yiqs = yiq1; - float3 yiqz = yiq1; - - float taps_comp = 1.0 + 2.0 * step(ntsc_taps, 15.5); - - if (MERGE > 0.5) { - float chroma_phase2 = (phase < 2.5) ? pii * (mod(pix_no.y, mod1) + mod(FrameCount + 1, 2.)) - : 0.6667 * pii * (mod(pix_no.y, mod2) + mod(FrameCount + 1, 2.)); - - float mod_phase2 = chroma_phase2 * (1.0 - mix1) + pix_no.x * CHROMA_MOD_FREQ * taps_comp; - - float i_mod2 = cos(mod_phase2); - float q_mod2 = sin(mod_phase2); - yiq2.yz *= float2(i_mod2, q_mod2); - yiq2 = mul(mix_m, yiq2); - yiq2.yz *= float2(i_mod2, q_mod2); - - if (res > 1.025) { - mod_phase2 = chroma_phase2 * (1.0 - mix1) + - res * pix_no.x * CHROMA_MOD_FREQ * taps_comp; - - i_mod2 = cos(mod_phase2); - q_mod2 = sin(mod_phase2); - yiqs.yz *= float2(i_mod2, q_mod2); - yiq2.x = dot(yiqs, mix_m[0]); - } - } - - float chroma_phase1 = (phase < 2.5) ? pii * (mod(pix_no.y, mod1) + mod(FrameCount, 2.)) - : 0.6667 * pii * (mod(pix_no.y, mod2) + mod(FrameCount, 2.)); - - float mod_phase1 = chroma_phase1 * (1.0 - mix1) + pix_no.x * CHROMA_MOD_FREQ * taps_comp; - - float i_mod1 = cos(mod_phase1); - float q_mod1 = sin(mod_phase1); - - yiq1.yz *= float2(i_mod1, q_mod1); - yiq1 = mul(mix_m, yiq1); - yiq1.yz *= float2(i_mod1, q_mod1); - - if (res > 1.025) { - mod_phase1 = chroma_phase1 * (1.0 - mix1) + res * pix_no.x * CHROMA_MOD_FREQ * taps_comp; - i_mod1 = cos(mod_phase1); - q_mod1 = sin(mod_phase1); - yiqz.yz *= float2(i_mod1, q_mod1); - yiq1.x = dot(yiqz, mix_m[0]); - } - - if (ntsc_phase == 3) { - yiq1.x = lum; - yiq2.x = lum; - } - - yiq1 = (MERGE < 0.5) ? yiq1 : 0.5 * (yiq1 + yiq2); - - return float4(yiq1, lum); -} - -float4 Signal_2_PS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float chroma_filter_2_phase[33] = { - 0.001384762, 0.001678312, 0.002021715, 0.002420562, 0.002880460, - 0.003406879, 0.004004985, 0.004679445, 0.005434218, 0.006272332, - 0.007195654, 0.008204665, 0.009298238, 0.010473450, 0.011725413, - 0.013047155, 0.014429548, 0.015861306, 0.017329037, 0.018817382, - 0.020309220, 0.021785952, 0.023227857, 0.024614500, 0.025925203, - 0.027139546, 0.028237893, 0.029201910, 0.030015081, 0.030663170, - 0.031134640, 0.031420995, 0.031517031}; - - float chroma_filter_3_phase[25] = { - -0.000118847, -0.000271306, -0.000502642, -0.000930833, - -0.001451013, -0.002064744, -0.002700432, -0.003241276, - -0.003524948, -0.003350284, -0.002491729, -0.000721149, - 0.002164659, 0.006313635, 0.011789103, 0.018545660, - 0.026414396, 0.035100710, 0.044196567, 0.053207202, - 0.061590275, 0.068803602, 0.074356193, 0.077856564, - 0.079052396}; - - float luma_filter_2_phase[33] = { - -0.000174844, -0.000205844, -0.000149453, -0.000051693, - 0.000000000, -0.000066171, -0.000245058, -0.000432928, - -0.000472644, -0.000252236, 0.000198929, 0.000687058, - 0.000944112, 0.000803467, 0.000363199, 0.000013422, - 0.000253402, 0.001339461, 0.002932972, 0.003983485, - 0.003026683, -0.001102056, -0.008373026, -0.016897700, - -0.022914480, -0.021642347, -0.028863273, 0.027271957, - 0.054921920, 0.098342579, 0.139044281, 0.168055832, - 0.178571429}; - - float luma_filter_3_phase[25] = { - -0.000012020, -0.000022146, -0.000013155, -0.000012020, - -0.000049979, -0.000113940, -0.000122150, -0.000005612, - 0.000170516, 0.000237199, 0.000169640, 0.000285688, - 0.000984574, 0.002018683, 0.002002275, -0.005909882, - -0.012049081, -0.018222860, -0.022606931, 0.002460860, - 0.035868225, 0.084016453, 0.135563500, 0.175261268, - 0.220176552}; - - float luma_filter_4_phase[25] = { - -0.000472644, -0.000252236, 0.000198929, 0.000687058, - 0.000944112, 0.000803467, 0.000363199, 0.000013422, - 0.000253402, 0.001339461, 0.002932972, 0.003983485, - 0.003026683, -0.001102056, -0.008373026, -0.016897700, - -0.022914480, -0.021642347, -0.028863273, 0.027271957, - 0.054921920, 0.098342579, 0.139044281, 0.168055832, - 0.178571429}; - - const int TAPS_2_phase = 32; - const int TAPS_3_phase = 24; - - float res = ntsc_scale; - float3 signal = 0.0; - float2 one = 0.25 * OriginalSize.zz / res; - float phase = (ntsc_phase == 0) ? ((NativeWidth > 300.0) ? 2.0 : 3.0) - : ((ntsc_phase > 1) ? 3.0 : 2.0); - - if (ntsc_phase == 3) { - phase = 3.0; - luma_filter_3_phase = luma_filter_4_phase; - } - - float3 wsum = 0.0.xxx; - float3 sums = wsum; - float3 tmps = wsum; - float offset = 0.0; - int i = 0; - float j = 0.0; - - if (phase < 2.5) { - float loop = max(ntsc_taps, 8.0); - float2 dx = float2(one.x, 0.0); - float2 xd = dx; - int loopstart = int(TAPS_2_phase - loop); - - float taps = 0.0; - float laps = ntsc_taps + 1.0; - float ssub = loop - loop / ntsc_cscale1; - - for (i = loopstart; i < 32; i++) { - offset = float(i - loopstart); - j = offset + 1.0; - xd = (offset - loop) * dx; - sums = fetch_offset1(xd); - taps = max(j - ssub, 0.0); - tmps = float3(luma_filter_2_phase[i], taps.xx); - wsum = wsum + tmps; - signal += sums * tmps; - } - taps = laps - ssub; - tmps = float3(luma_filter_2_phase[TAPS_2_phase], taps.xx); - wsum = wsum + wsum + tmps; - - signal += tex2D(NTSC_S03, tex_1).xyz * tmps; - signal = signal / wsum; - - } else { - float loop = min(ntsc_taps, TAPS_3_phase); - one.y = one.y / ntsc_cscale2; - float3 dx = float3(one.x, one.y, 0.0); - float3 xd = dx; - - int loopstart = int(24.0 - loop); - - for (i = loopstart; i < 24; i++) { - offset = float(i - loopstart); - j = offset + 1.0; - xd.xy = (offset - loop) * dx.xy; - sums = fetch_offset2(xd); - tmps = float3(luma_filter_3_phase[i], - chroma_filter_3_phase[i].xx); - - wsum = wsum + tmps; - signal += sums * tmps; - } - tmps = float3(luma_filter_3_phase[TAPS_3_phase], - chroma_filter_3_phase[TAPS_3_phase], - chroma_filter_3_phase[TAPS_3_phase]); - - wsum = wsum + wsum + tmps; - signal += tex2D(NTSC_S03, tex_1).xyz * tmps; - signal = signal / wsum; - } - - if (ntsc_ring > 0.05) { - float2 dx = float2(OriginalSize.z / min(res, 1.0), 0.0); - - float a = tex2D(NTSC_S03, tex_1 - 1.5 * dx).a; - float b = tex2D(NTSC_S03, tex_1 - 0.5 * dx).a; - float c = tex2D(NTSC_S03, tex_1 + 1.5 * dx).a; - float d = tex2D(NTSC_S03, tex_1 + 0.5 * dx).a; - float e = tex2D(NTSC_S03, tex_1).a; - - signal.x = lerp(signal.x, - clamp(signal.x, - min(min(min(a, b), min(c, d)), e), - max(max(max(a, b), max(c, d)), e)), - ntsc_ring); - } - - float3 x = rgb2yiq(tex2D(NTSC_S02, tex_1).rgb); - signal.x = clamp(signal.x, -1.0, 1.0); - float3 rgb = signal; - - return float4(rgb, x.x); -} - -float4 Signal_3_PS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float2 dx = float2(0.25 * OriginalSize.z, 0.0) / 4.0; - float2 tcoord = tex_2 + dx; - float2 offset = float2(0.5 * OriginalSize.z, 0.0); - - float3 ll1 = tex2D(NTSC_S04, tcoord + offset).xyz; - float3 ll2 = tex2D(NTSC_S04, tcoord - offset).xyz; - float3 ll3 = tex2D(NTSC_S04, tcoord + 0.50 * offset).xyz; - float3 ll4 = tex2D(NTSC_S04, tcoord - 0.50 * offset).xyz; - float3 ref = tex2D(NTSC_S04, tcoord).xyz; - - float lum1 = min(tex2D(NTSC_S04, tex_2 - dx).a, - tex2D(NTSC_S04, tex_2 + dx).a); - - float lum2 = max(ref.x, 0.0); - - float dif = max(max(abs(ll1.x - ll2.x), abs(ll1.y - ll2.y)), - max(abs(ll1.z - ll2.z), abs(ll1.x * ll1.x - ll2.x * ll2.x))); - - float dff = max(max(abs(ll3.x - ll4.x), abs(ll3.y - ll4.y)), - max(abs(ll3.z - ll4.z), abs(ll3.x * ll3.x - ll4.x * ll4.x))); - - float lc = (1.0 - smoothstep(0.10, 0.20, abs(lum2 - lum1))) * pow(dff, 0.125); - float sweight = smoothstep(0.05 - 0.03 * lc, 0.45 - 0.40 * lc, dif); - - float3 signal = ref; - - if (abs(ntsc_shrp) > -0.1) { - float lummix = lerp(lum2, lum1, 0.1 * abs(ntsc_shrp)); - float lm1 = lerp(lum2 * lum2, lum1 * lum1, 0.1 * abs(ntsc_shrp)); - lm1 = sqrt(lm1); - - float lm2 = lerp(sqrt(lum2), sqrt(lum1), 0.1 * abs(ntsc_shrp)); - lm2 = lm2 * lm2; - - float k1 = abs(lummix - lm1) + 0.00001; - float k2 = abs(lummix - lm2) + 0.00001; - lummix = min((k2 * lm1 + k1 * lm2) / (k1 + k2), 1.0); - - signal.x = lerp(lum2, lummix, smoothstep(0.25, 0.4, pow(dff, 0.125))); - signal.x = min(signal.x, max(ntsc_shpe * signal.x, lum2)); - } else { - signal.x = clamp(signal.x, 0.0, 1.0); - } - - float3 rgb = signal; - if (ntsc_shrp < -0.1) { - rgb.x = lerp(ref.x, rgb.x, sweight); - } - - rgb.x = pow(rgb.x, 1.0 / ntsc_gamma); - rgb = clamp(yiq2rgb(rgb), 0.0, 1.0); - - return float4(rgb, 1.0); -} - -float4 SharpnessPS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float2 g01 = float2(-0.5 * OriginalSize.z, 0.0); - float2 g21 = float2(0.5 * OriginalSize.z, 0.0); - float3 c01 = tex2D(NTSC_S05, texcoord + g01).rgb; - float3 c21 = tex2D(NTSC_S05, texcoord + g21).rgb; - float3 c11 = tex2D(NTSC_S05, texcoord).rgb; - float3 b11 = 0.5 * (c01 + c21); - - float contrast = max(max(c11.r, c11.g), c11.b); - contrast = lerp(2.0 * CCONTR, CCONTR, contrast); - - float3 mn = min(min(c01, c21), c11); - float3 mn1 = min(mn, c11 * (1.0 - contrast)); - float3 mx = max(max(c01, c21), c11); - float3 mx1 = max(mx, c11 * (1.0 + contrast)); - float3 dif = pow(mx1 - mn1 + 0.0001, 0.75); - - float3 sharpen = lerp(CSHARPEN * CDETAILS, CSHARPEN, dif); - float3 res = clamp(lerp(c11, b11, -sharpen), mn1, mx1); - - if (DEBLUR > 1.125) { - c01 = tex2D(NTSC_S02, texcoord + 2.0 * g01).rgb; - c21 = tex2D(NTSC_S02, texcoord + 2.0 * g21).rgb; - c11 = tex2D(NTSC_S02, texcoord).rgb; - - mn1 = sqrt(min(min(c01, c21), c11) * mn); - mx1 = sqrt(max(max(c01, c21), c11) * mx); - - float3 dif1 = max(res - mn1, 0.0) + 0.00001; - dif1 = pow(dif1, DEBLUR.xxx); - - float3 dif2 = max(mx1 - res, 0.0) + 0.00001; - dif2 = pow(dif2, DEBLUR.xxx); - - float3 ratio = dif1 / (dif1 + dif2); - - sharpen = min(lerp(mn1, mx1, ratio), - pow(res, lerp(0.75.xxx, 1.10.xxx, res))); - - res = rgb2yiq(res); - res.x = dot(sharpen, float3(0.2989, 0.5870, 0.1140)); - res = max(yiq2rgb(res), 0.0); - } - return float4(res, 1.0); -} - -float4 LuminancePS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float m = max(log2(NTSC_02.x), log2(NTSC_02.y)); - - m = floor(max(m, 1.0)) - 1.0; - - float2 dx = float2(1.0 / NTSC_02.x, 0.0); - float2 dy = float2(0.0, 1.0 / NTSC_02.y); - float2 x2 = 2.0 * dx; - float2 y2 = 2.0 * dy; - - float ltotal = 0.0; - - ltotal += length(tex2Dlod(NTSC_S06, float4(float2(0.3, 0.3), m, 0)).rgb); - ltotal += length(tex2Dlod(NTSC_S06, float4(float2(0.3, 0.7), m, 0)).rgb); - ltotal += length(tex2Dlod(NTSC_S06, float4(float2(0.7, 0.3), m, 0)).rgb); - ltotal += length(tex2Dlod(NTSC_S06, float4(float2(0.7, 0.7), m, 0)).rgb); - - ltotal *= 0.25; - ltotal = pow(0.577350269 * ltotal, 0.7); - - float lhistory = tex2D(NTSC_S07, 0.5).a; - - ltotal = lerp(ltotal, lhistory, lsmooth); - - float3 l1 = COMPAT_TEXTURE(NTSC_S06, fuxcoord.xy).rgb; - float3 r1 = COMPAT_TEXTURE(NTSC_S06, fuxcoord.xy + dx).rgb; - float3 l2 = COMPAT_TEXTURE(NTSC_S06, fuxcoord.xy - dx).rgb; - float3 r2 = COMPAT_TEXTURE(NTSC_S06, fuxcoord.xy + x2).rgb; - - float c1 = dist(l2, l1); - float c2 = dist(l1, r1); - float c3 = dist(r2, r1); - - return float4(c1, c2, c3, ltotal); -} - -float4 LinearizePS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float3 c1 = COMPAT_TEXTURE(NTSC_S06, fuxcoord).rgb; - float3 c2 = COMPAT_TEXTURE(NTSC_S06, fuxcoord + float2(0.0, OriginalSize.w)).rgb; - - if ((downsample_levelx + downsample_levely) > 0.025) { - c1 = fetch_pixel(fuxcoord); - c2 = fetch_pixel(fuxcoord + float2(0.0, OriginalSize.w)); - } - - float3 c = c1; - float intera = 1.0; - float gamma_in = clamp(gamma_i, 1.0, 5.0); - - float m1 = max(max(c1.r, c1.g), c1.b); - float m2 = max(max(c2.r, c2.g), c2.b); - float3 df = abs(c1 - c2); - float d = max(max(df.r, df.g), df.b); - - if (interm == 2) { - d = lerp(0.1 * d, 10.0 * d, step(m1 / (m2 + 0.0001), m2 / (m1 + 0.0001))); - } - - float r = m1; - - float yres_div = 1.0; - if (intres > 1.25) { - yres_div = intres; - } - - bool hscans = (hiscan > 0.5); - - if (interr <= NativeHeight / yres_div && interm > 0 && intres != 1.0 && - intres != 0.5 || hscans) { - intera = 0.25; - - float liine_no = clamp(floor(mod(OriginalSize.y * fuxcoord.y, 2.0)), 0.0, 1.0); - float frame_no = clamp(floor(mod(FrameCount, 2.0)), 0.0, 1.0); - - float ii = abs(liine_no - frame_no); - - if (interm < 4) { - c2 = plant(lerp(c2, c2 * c2, iscans), max(max(c2.r, c2.g), c2.b)); - r = clamp(max(m1 * ii, (1.0 - iscanb) * min(m1, m2)), 0.0, 1.0); - c = plant(lerp(lerp(c1, - c2, - min(lerp(m1, 1.0 - m2, min(m1, 1.0 - m1)) / (d + 0.00001), - 1.0)), - c1, - ii), - r); - - if (interm == 3) { - c = (1.0 - 0.5 * iscanb) * lerp(c2, c1, ii); - } - } - if (interm == 4) { - c = plant(lerp(c, c * c, 0.5 * iscans), - max(max(c.r, c.g), c.b)) * (1.0 - 0.5 * iscanb); - } - if (interm == 5) { - c = lerp(c2, c1, 0.5); - c = plant(lerp(c, c * c, 0.5 * iscans), - max(max(c.r, c.g), c.b)) * (1.0 - 0.5 * iscanb); - } - if (hscans) { - c = c1; - } - } - - c = pow(c, gamma_in); - - if (fuxcoord.x > 0.5) { - gamma_in = intera; - } else { - gamma_in = 1.0 / gamma_in; - } - - return float4(c, gamma_in); -} - -float4 HGaussianPS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float4 GaussSize = OriginalSize * - lerp(1.0.xxxx, - float4(FINE_GAUSS, FINE_GAUSS, 1.0 / FINE_GAUSS, 1.0 / FINE_GAUSS), - min(FINE_GAUSS - 1.0, 1.0)); - - float f = frac(GaussSize.x * texcoord.x); - f = 0.5 - f; - - float2 tex = floor(GaussSize.xy * texcoord) * GaussSize.zw + 0.5 * GaussSize.zw; - float3 color = 0.0; - float2 dx = float2(GaussSize.z, 0.0); - - float3 pixel; - float w; - float wsum = 0.0; - float n = -SIZEH; - - do { - pixel = COMPAT_TEXTURE(NTSC_S08, tex + n * dx).rgb; - if (m_glow > 0) { - pixel = max(pixel - m_glow_cutoff, 0.0); - pixel = plant(pixel, - max(max(max(pixel.r, pixel.g), pixel.b) - m_glow_cutoff, 0.0)); - } - - w = gauss_h(n + f); - color = color + w * pixel; - wsum = wsum + w; - n = n + 1.0; - } while (n <= SIZEH); - - color = color / wsum; - - return float4(color, 1.0); -} - -float4 VGaussianPS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float4 GaussSize = float4(SourceSize.x, OriginalSize.y, SourceSize.z, OriginalSize.w) * - lerp(1.0.xxxx, - float4(FINE_GAUSS, FINE_GAUSS, 1.0 / FINE_GAUSS, 1.0 / FINE_GAUSS), - min(FINE_GAUSS - 1.0, 1.0)); - - float f = frac(GaussSize.y * texcoord.y); - f = 0.5 - f; - - float2 tex = floor(GaussSize.xy * texcoord) * GaussSize.zw + 0.5 * GaussSize.zw; - float3 color = 0.0; - float2 dy = float2(0.0, GaussSize.w); - - float3 pixel; - float w; - float wsum = 0.0; - float n = -SIZEV; - - do { - pixel = COMPAT_TEXTURE(NTSC_S10, tex + n * dy).rgb; - - w = gauss_v(n + f); - color = color + w * pixel; - wsum = wsum + w; - n = n + 1.0; - } while (n <= SIZEV); - - color = color / wsum; - - return float4(color, 1.0); -} - -float4 BloomHorzPS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float4 BloomSize = OriginalSize * - lerp(1.0.xxxx, - float4(FINE_BLOOM, FINE_BLOOM, 1.0 / FINE_BLOOM, 1.0 / FINE_BLOOM), - min(FINE_BLOOM - 1.0, 1.0)); - - float f = frac(BloomSize.x * texcoord.x); - f = 0.5 - f; - - float2 tex = floor(BloomSize.xy * texcoord) * BloomSize.zw + 0.5 * BloomSize.zw; - float4 color = 0.0; - float2 dx = float2(BloomSize.z, 0.0); - - float4 pixel; - float w; - float wsum = 0.0; - float n = -SIZEX; - - do { - pixel = COMPAT_TEXTURE(NTSC_S08, tex + n * dx); - - w = bloom_h(n + f); - pixel.a = max(max(pixel.r, pixel.g), pixel.b); - pixel.a *= pixel.a * pixel.a; - color = color + w * pixel; - wsum = wsum + w; - n = n + 1.0; - } while (n <= SIZEX); - - color = color / wsum; - - return float4(color.rgb, pow(color.a, 0.333333)); -} - -float4 BloomVertPS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float4 BloomSize = float4(SourceSize.x, OriginalSize.y, SourceSize.z, OriginalSize.w) * - lerp(1.0.xxxx, - float4(FINE_BLOOM, FINE_BLOOM, 1.0 / FINE_BLOOM, 1.0 / FINE_BLOOM), - min(FINE_BLOOM - 1.0, 1.0)); - - float f = frac(BloomSize.y * texcoord.y); - f = 0.5 - f; - - float2 tex = floor(BloomSize.xy * texcoord) * BloomSize.zw + 0.5 * BloomSize.zw; - float4 color = 0.0; - float2 dy = float2(0.0, BloomSize.w); - - float4 pixel; - float w; - float wsum = 0.0; - float n = -SIZEY; - - do { - pixel = COMPAT_TEXTURE(NTSC_S12, tex + n * dy); - - w = bloom_v(n + f); - pixel.a *= pixel.a * pixel.a; - color = color + w * pixel; - wsum = wsum + w; - - n = n + 1.0; - } while (n <= SIZEY); - - color = color / wsum; - - return float4(color.rgb, pow(color.a, 0.175000)); -} - -float4 NTSC_TV1_PS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float2 prescalex = float2(tex2Dsize(NTSC_S08, 0)) / OriginalSize.xy; - - float4 PALSize = OriginalSize * float4(prescalex.x, - prescalex.y, - 1.0 / prescalex.x, - 1.0 / prescalex.y); - - float f = frac(PALSize.x * fuxcoord.x); - f = 0.5 - f; - - float2 tex = floor(PALSize.xy * fuxcoord) * PALSize.zw + 0.5 * PALSize.zw; - float3 color = 0.0.xxx; - float scolor = 0.0; - float2 dx = float2(PALSize.z, 0.0); - - float3 pixel; - float w = 0.0; - float swsum = 0.0; - float wsum = 0.0; - float xs = prescalex.x * 0.5; - float hsharpness = HSHARPNESS * xs; - - float3 cmax = 0.0.xxx; - float3 cmin = 1.0.xxx; - float sharp = crthd_h(hsharpness, xs) * S_SHARPH; - float maxsharp = MAXS; - float FPR = hsharpness; - float fpx = 0.0; - float sp = 0.0; - float sw = 0.0; - float ts = 0.025; - - float3 luma = float3(0.2126, 0.7152, 0.0722); - - float LOOPSIZE = ceil(2.0 * FPR); - float CLPSIZE = round(2.0 * LOOPSIZE / 3.0); - float n = -LOOPSIZE; - - do { - pixel = COMPAT_TEXTURE(NTSC_S08, tex + n * dx).rgb; - - sp = max(max(pixel.r, pixel.g), pixel.b); - w = crthd_h(n + f, xs) - sharp; - fpx = abs(n + f - sign(n) * FPR) / FPR; - - if (abs(n) <= CLPSIZE) { - cmax = max(cmax, pixel); - cmin = min(cmin, pixel); - } - if (w < 0.0) { - w = clamp(w, lerp(-maxsharp, 0.0, pow(clamp(fpx, 0.0, 1.0), HSHARP)), 0.0); - } - - color = color + w * pixel; - wsum = wsum + w; - sw = max(w, 0.0) * (dot(pixel, luma) + ts); - scolor = scolor + sw * sp; - swsum = swsum + sw; - n = n + 1.0; - } while (n <= LOOPSIZE); - - color = color / wsum; - scolor = scolor / swsum; - - color = clamp(lerp(clamp(color, cmin, cmax), color, HARNG), 0.0, 1.0); - scolor = clamp(lerp(max(max(color.r, color.g), color.b), scolor, spike), 0.0, 1.0); - - return float4(color, scolor); -} - -float4 NTSC_TV2_PS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float prescalex = tex2Dsize(NTSC_S08, 0).x / (2.0 * OriginalSize.x); - - float4 PALSize = OriginalSize * float4(prescalex, 1.0, 1.0 / prescalex, 1.0); - - float gamma_in = 1.0 / COMPAT_TEXTURE(NTSC_S08, 0.25).a; - float lum = COMPAT_TEXTURE(NTSC_S07, 0.5).a; - float intera = COMPAT_TEXTURE(NTSC_S08, float2(0.75, 0.25)).a; - bool hscans = (hiscan > 0.5); - bool interb = (((intera < 0.35) || (no_scanlines > 0.025)) && !hscans); - - PALSize *= float4(2.0, 1.0, 0.5, 1.0); - - float SourceY = PALSize.y; - - float sy = 1.0; - - if (intres == 1.0) { - sy = max(floor(SourceY / 199.0), 1.0); - } - if (intres > 0.25 && intres != 1.0) { - sy = intres; - } - - PALSize *= float4(1.0, 1.0 / sy, 1.0, sy); - float2 lexcoord = fuxcoord.xy; - - if (IOS > 0.0 && !interb) { - float2 ofactor = OutputSize.xy / OriginalSize.xy; - float2 intfactor = (IOS < 2.5) ? floor(ofactor) : ceil(ofactor); - float2 diff = ofactor / intfactor; - float scan = diff.y; - - lexcoord = overscan(lexcoord, scan, scan); - - if (IOS == 1.0 || IOS == 3.0) { - lexcoord = float2(fuxcoord.x, lexcoord.y); - } - } - - float factor = 1.0 + (1.0 - 0.5 * OS) * blm_2 / 100.0 - lum * blm_2 / 100.0; - - lexcoord = overscan(lexcoord, factor, factor); - lexcoord = overscan(lexcoord, (OriginalSize.x - overscanx * BufferToViewportRatio.x) / OriginalSize.x, - (OriginalSize.y - overscany * BufferToViewportRatio.y) / OriginalSize.y); - - float2 pos = warp(lexcoord); - float2 coffset = 0.5; - float2 ps = PALSize.zw; - float2 OGL2Pos = pos * PALSize.xy - coffset; - float2 fp = frac(OGL2Pos); - float2 dx = float2(ps.x, 0.0); - float2 dy = float2(0.0, ps.y); - float f = fp.y; - float2 pC4 = floor(OGL2Pos) * ps + 0.5 * ps; - pC4.x = pos.x; - - if (intres == 0.5 && prescalex < 1.5) { - pC4.y = floor(pC4.y * OriginalSize.y) * OriginalSize.w + 0.5 * OriginalSize.w; - } - if (interb && no_scanlines < 0.025 || hscans) { - pC4.y = pos.y; - } else if (interb) { - pC4.y = pC4.y + smoothstep(0.40 - 0.5 * no_scanlines, 0.60 + 0.5 * no_scanlines, f) * PALSize.w; - } - - float3 color1 = COMPAT_TEXTURE(NTSC_S09, pC4).rgb; - float3 scolor1 = COMPAT_TEXTURE(NTSC_S09, pC4).aaa; - - if (!interb) { - color1 = pow(color1, scangamma / gamma_in); - } - - pC4 += dy; - if (intres == 0.5 && prescalex < 1.5) { - pC4.y = floor((pos.y + 0.33 * dy.y) * OriginalSize.y) * OriginalSize.w + - 0.5 * OriginalSize.w; - } - - float3 color2 = COMPAT_TEXTURE(NTSC_S09, pC4).rgb; - float3 scolor2 = COMPAT_TEXTURE(NTSC_S09, pC4).aaa; - - if (!interb) { - color2 = pow(color2, scangamma / gamma_in); - } - - float3 ctmp = color1; - float w3 = 1.0; - float3 color = color1; - float3 one = 1.0; - - if (hscans) { - color2 = color1; - scolor2 = scolor1; - } - - if (!interb || hscans) { - float3 luma = float3(0.2126, 0.7152, 0.0722); - float ssub = ssharp * max(abs(scolor1.x - scolor2.x), - abs(dot(color1, luma) - dot(color2, luma))); - - float shape1 = lerp(scanline1, scanline2 + ssub * scolor1.x * 35.0, f); - float shape2 = lerp(scanline1, scanline2 + ssub * scolor2.x * 35.0, 1.0 - f); - - float wt1 = st0(f); - float wt2 = st0(1.0 - f); - - float3 color0 = color1 * wt1 + color2 * wt2; - float3 scolor0 = scolor1 * wt1 + scolor2 * wt2; - ctmp = color0 / (wt1 + wt2); - float3 sctmp = scolor0 / (wt1 + wt2); - - float3 w1, w2; - - float3 cref1 = lerp(sctmp, scolor1, beam_size); - float creff1 = pow(max(max(cref1.r, cref1.g), cref1.b), scan_falloff); - float3 cref2 = lerp(sctmp, scolor2, beam_size); - float creff2 = pow(max(max(cref2.r, cref2.g), cref2.b), scan_falloff); - - if (tds) { - shape1 = lerp(scanline2, shape1, creff1); - shape2 = lerp(scanline2, shape2, creff2); - } - - float f1 = f; - float f2 = 1.0 - f; - float m1 = max(max(color1.r, color1.g), color1.b) + eps; - float m2 = max(max(color2.r, color2.g), color2.b) + eps; - - cref1 = color1 / m1; - cref2 = color2 / m2; - - if (gsl < 2) { - w1 = sw0(f1, creff1, shape1, cref1); - w2 = sw0(f2, creff2, shape2, cref2); - } else if (gsl == 2) { - w1 = sw1(f1, creff1, shape1, cref1); - w2 = sw1(f2, creff2, shape2, cref2); - } else { - w1 = sw2(f1, creff1, shape1, cref1); - w2 = sw2(f2, creff2, shape2, cref2); - } - - float3 w3 = w1 + w2; - float wf1 = max(max(w3.r, w3.g), w3.b); - - if (wf1 > 1.0) { - wf1 = 1.0 / wf1; - w1 *= wf1, w2 *= wf1; - } - - if (abs(clp) > 0.005) { - sy = m1; - one = (clp > 0.0) ? w1 : 1.0.xxx; - - float sat = 1.0001 - min(min(cref1.r, cref1.g), cref1.b); - - color1 = lerp(color1, - plant(pow(color1, 0.70.xxx - 0.325 * sat), sy), - pow(sat, 0.3333) * one * abs(clp)); - - sy = m2; - one = (clp > 0.0) ? w2 : 1.0.xxx; - sat = 1.0001 - min(min(cref2.r, cref2.g), cref2.b); - - color2 = lerp(color2, - plant(pow(color2, 0.70.xxx - 0.325 * sat), sy), - pow(sat, 0.3333) * one * abs(clp)); - } - color = (gc(color1) * w1 + gc(color2) * w2); - color = min(color, 1.0); - } - - if (interb) { - color = gc(color1); - } - - float colmx = max(max(ctmp.r, ctmp.g), ctmp.b); - if (!interb) { - color = pow(color, gamma_in / scangamma); - } - - return float4(color, colmx); -} - -float4 ChromaticPS(float4 position : SV_Position, float2 texcoord : TEXCOORD) : SV_Target -{ - float gamma_in = 1.0 / COMPAT_TEXTURE(NTSC_S08, 0.25).a; - float lum = COMPAT_TEXTURE(NTSC_S07, 0.5).a; - float intera = COMPAT_TEXTURE(NTSC_S08, float2(0.75, 0.25)).a; - - bool interb = ((intera < 0.35 || no_scanlines > 0.025) && (hiscan < 0.5)); - - float2 lexcoord = fuxcoord.xy; - - if (IOS > 0.0 && !interb) { - float2 ofactor = OutputSize.xy / OriginalSize.xy; - float2 intfactor = (IOS < 2.5) ? floor(ofactor) : ceil(ofactor); - - float2 diff = ofactor / intfactor; - float scan = diff.y; - - lexcoord = overscan(lexcoord, scan, scan); - - if (IOS == 1.0 || IOS == 3.0) { - lexcoord = float2(fuxcoord.x, lexcoord.y); - } - } - - float factor = 1.0 + (1.0 - 0.5 * OS) * blm_2 / 100.0 - lum * blm_2 / 100.0; - - lexcoord = overscan(lexcoord, factor, factor); - lexcoord = overscan(lexcoord, (OriginalSize.x - overscanx * BufferToViewportRatio.x) / OriginalSize.x, - (OriginalSize.y - overscany * BufferToViewportRatio.y) / OriginalSize.y); - - float2 pos0 = warp(fuxcoord.xy); - float2 pos1 = fuxcoord.xy; - float2 pos = warp(lexcoord); - - float3 color = COMPAT_TEXTURE(NTSC_S14, pos1).rgb; - float3 Bloom = COMPAT_TEXTURE(NTSC_S13, pos).rgb; - float3 Glow = COMPAT_TEXTURE(NTSC_S11, pos).rgb; - - if ((abs(deconrx) + abs(deconry) + abs(decongx) + abs(decongy) + - abs(deconbx) + abs(deconby)) > 0.2) { - bring_pixel(color, Bloom, Glow, pos1, pos); - } - - float cm = igc(max(max(color.r, color.g), color.b)); - float mx1 = COMPAT_TEXTURE(NTSC_S14, pos1).a; - float colmx = max(mx1, cm); - float w3 = min((cm + 0.0001) / (colmx + 0.0005), 1.0); - - if (interb) { - w3 = 1.00; - } - - float2 dx = float2(0.001, 0.0); - - float mx0 = COMPAT_TEXTURE(NTSC_S14, pos1 - dx).a; - float mx2 = COMPAT_TEXTURE(NTSC_S14, pos1 + dx).a; - float mxg = max(max(mx0, mx1), max(mx2, cm)); - float mx = pow(mxg, 1.40 / gamma_in); - - dx = float2(OriginalSize.z, 0.0) * 0.25; - - mx0 = COMPAT_TEXTURE(NTSC_S14, pos1 - dx).a; - mx2 = COMPAT_TEXTURE(NTSC_S14, pos1 + dx).a; - - float mb = (1.0 - min(abs(mx0 - mx2) / (0.5 + mx1), 1.0)); - - float3 orig1 = color; - float3 one = 1.0; - - float3 cmask = one; - float3 dmask = one; - float3 emask = one; - - float mwidths[15] = {0.0, 2.0, 3.0, 3.0, 6.0, 6.0, 2.4, 3.5, 2.4, - 3.25, 3.5, 4.5, 4.25, 7.5, 6.25}; - - float mwidth = mwidths[shadow_mask]; - - float mask_compensate = frac(mwidth); - - if (shadow_mask > 0) { - float2 maskcoord = fracoord.xy * 1.00001; - float2 scoord = maskcoord; - mwidth = floor(mwidth) * masksize; - float swidth = mwidth; - bool zoomed = (abs(mask_zoom) > 0.75); - float mscale = 1.0; - float2 maskcoord0 = maskcoord; - maskcoord.y = floor(maskcoord.y / masksize); - float mwidth1 = max(mwidth + mask_zoom, 2.0); - - if (mshift > 0.25) { - float stagg_lvl = 1.0; - if (frac(mshift) > 0.25) { - stagg_lvl = 2.0; - } - float next_line = float(floor(mod(maskcoord.y, 2.0 * stagg_lvl)) < - stagg_lvl); - maskcoord0.x = maskcoord0.x + next_line * 0.5 * mwidth1; - } - - maskcoord = maskcoord0 / masksize; - - if (!zoomed) { - cmask *= crt_mask(floor(maskcoord), mx, mb); - } else { - mscale = mwidth1 / mwidth; - - float mlerp = frac(maskcoord.x / mscale); - - if (zoom_mask > 0.025) { - mlerp = clamp((1.0 + zoom_mask) * mlerp - 0.5 * zoom_mask, 0.0, 1.0); - } - - float mcoord = floor(maskcoord.x / mscale); - if (shadow_mask == 13 && mask_zoom == -2.0) { - mcoord = ceil(maskcoord.x / mscale); - } - - cmask *= lerp(crt_mask(float2(mcoord, maskcoord.y), mx, mb), - crt_mask(float2(mcoord + 1.0, maskcoord.y), mx, mb), - mlerp); - } - - if (slotwidth > 0.5) { - swidth = slotwidth; - } - - float smask = 1.0; - float sm_offset = 0.0; - bool bsm_offset = (shadow_mask == 1 || shadow_mask == 3 || - shadow_mask == 6 || shadow_mask == 7 || - shadow_mask == 9 || shadow_mask == 12); - - if (zoomed) { - if (mask_layout < 0.5 && bsm_offset) { - sm_offset = 1.0; - } else if (bsm_offset) { - sm_offset = -1.0; - } - } - - swidth = round(swidth * mscale); - smask = slt_mask(scoord + float2(sm_offset, 0.0), mx, swidth); - smask = clamp(smask + lerp(smask_mit, 0.0, - min(w3, - pow(w3 * max(max(orig1.r, orig1.g), orig1.b), - 0.33333))), - 0.0, - 1.0); - - emask = cmask; - cmask *= smask; - dmask = cmask; - - if (abs(mask_bloom) > 0.025) { - float maxbl = max(max(max(Bloom.r, Bloom.g), Bloom.b), mxg); - maxbl = maxbl * max(lerp(1.0, 2.0 - colmx, bloom_dist), 0.0); - - if (mask_bloom > 0.025) { - cmask = max(min(cmask + maxbl * mask_bloom, 1.0), cmask); - } else { - cmask = max(lerp(cmask, - cmask * (1.0 - 0.5 * maxbl) + plant(pow(Bloom, 0.35.xxx), maxbl), - -mask_bloom), - cmask); - } - } - - color = pow(color, mask_gamma / gamma_in); - color = color * cmask; - color = min(color, 1.0); - color = pow(color, gamma_in / mask_gamma); - - cmask = min(cmask, 1.0); - dmask = min(dmask, 1.0); - } - - float dark_compensate = lerp(max(clamp(lerp(mcut, maskstr, mx), 0.0, 1.0) - - 1.0 + mask_compensate, - 0.0) + 1.0, - 1.0, - mx); - - if (shadow_mask == 0) { - dark_compensate = 1.0; - } - - float bb = lerp(brightboost1, brightboost2, mx) * dark_compensate; - color *= bb; - - float3 Ref = COMPAT_TEXTURE(NTSC_S08, pos).rgb; - float maxb = COMPAT_TEXTURE(NTSC_S13, pos).a; - - float vig = COMPAT_TEXTURE(NTSC_S02, - clamp((pos - 0.5) * BufferToViewportRatio + 0.5, - 0.0 + 0.5 * OriginalSize.zw, - 1.0 - 0.5 * OriginalSize.zw)).a; - - float3 bcmask = lerp(one, cmask, b_mask); - float3 hcmask = lerp(one, cmask, h_mask); - - float3 Bloom1 = Bloom; - - if (abs(blm_1) > 0.025) { - if (blm_1 < -0.01) { - Bloom1 = plant(Bloom, maxb); - } - - Bloom1 = min(Bloom1 * (orig1 + color), - max(0.5 * (colmx + orig1 - color), 0.001 * Bloom1)); - - Bloom1 = 0.5 * (Bloom1 + - lerp(Bloom1, lerp(colmx * orig1, Bloom1, 0.5), 1.0 - color)); - - Bloom1 = bcmask * Bloom1 * max(lerp(1.0, 2.0 - colmx, bloom_dist), 0.0); - - color = pow(pow(color, mask_gamma / gamma_in) + - abs(blm_1) * pow(Bloom1, mask_gamma / gamma_in), - gamma_in / mask_gamma); - } - - if (!interb) { - color = declip(min(color, 1.0), lerp(1.0, w3, 0.6)); - } - - if (halation > 0.01) { - Bloom = 0.5 * (Bloom + Bloom * Bloom); - - float mbl = max(max(Bloom.r, Bloom.g), Bloom.b); - float mxh = colmx + colmx * colmx; - - Bloom = plant(Bloom, max(1.25 * (mbl - 0.1375), 0.165 * mxh * (1.0 + w3))); - Bloom = max((2.0 * lerp(maxb * maxb, maxb, colmx) - - 0.5 * max(max(Ref.r, Ref.g), Ref.b)), - 0.25) * - Bloom; - - Bloom = min((2.5 - colmx + 0.5 * color) * - plant(0.375 + orig1, - lerp(0.5 * (1.0 + w3), (0.50 + w3) / 1.5, colmx)) * - hcmask * Bloom, - 1.0 - color); - - color = pow(pow(color, mask_gamma / gamma_in) + - halation * pow(Bloom, mask_gamma / gamma_in), - gamma_in / mask_gamma); - - } else if (halation < -0.01) { - float mbl = max(max(Bloom.r, Bloom.g), Bloom.b); - - Bloom = plant(Bloom + Ref + orig1 + Bloom * Bloom * Bloom, - min(mbl * mbl, 0.75)); - - color = color + - 2.0 * lerp(1.0, w3, 0.5 * colmx) * hcmask * Bloom * (-halation); - } - - float w = 0.25 + 0.60 * lerp(w3, 1.0, sqrt(colmx)); - - if (smoothmask) { - color = min(color, 1.0); - color = max(min(color / w3, 1.0) * w3, min(orig1 * bb, color * (1.0 - w3))); - } - - if (m_glow == 0) { - Glow = lerp(Glow, 0.25 * color, colmx); - } else { - float3 orig2 = plant(orig1 + 0.001 * Ref, 1.0); - - maxb = max(max(Glow.r, Glow.g), Glow.b); - Bloom = plant(Glow, 1.0); - Ref = abs(orig2 - Bloom); - - mx0 = max(max(orig2.r, orig2.g), orig2.b) - - min(min(orig2.r, orig2.g), orig2.b); - - mx2 = max(max(Bloom.r, Bloom.g), Bloom.b) - - min(min(Bloom.r, Bloom.g), Bloom.b); - - Bloom = lerp(maxb * min(Bloom, orig2), - w * lerp(lerp(Glow, - max(max(Ref.r, Ref.g), Ref.b) * Glow, - max(mx, mx0)), - lerp(color, Glow, mx2), - max(mx0, mx2) * Ref), - min(sqrt((1.10 - mx0) * (0.10 + mx2)), 1.0)); - - if (m_glow == 2) { - Glow = lerp(0.5 * Glow * Glow, Bloom, Bloom); - } - - Glow = lerp(m_glow_low * Glow, - m_glow_high * Bloom, - pow(colmx, m_glow_dist / gamma_in)); - } - - if (m_glow == 0) { - if (glow >= 0.0) { - color = color + 0.5 * Glow * glow; - } else { - color = color + abs(glow) * min(emask * emask, 1.0) * Glow; - } - } else { - float3 fmask = clamp(lerp(one, dmask, m_glow_mask), 0.0, 1.0); - color = color + abs(glow) * fmask * Glow; - } - - color = min(color, 1.0); - color = min(color, max(orig1, color) * lerp(one, dmask, mclip)); - color = pow(color, 1.0 / gamma_o); - - float rc = 0.6 * sqrt(max(max(color.r, color.g), color.b)) + 0.4; - - if (abs(addnoised) > 0.01) { - float3 noise0 = noise(float3(floor(OutputSize.xy * fuxcoord / noiseresd), - FrameCount)); - if (noisetype == 0) { - color = lerp(color, noise0, 0.25 * abs(addnoised) * rc); - } else { - color = min(color * lerp(1.0, 1.5 * noise0.x, 0.5 * abs(addnoised)), 1.0); - } - } - - colmx = max(max(orig1.r, orig1.g), orig1.b); - color = color + bmask * lerp(emask, - 0.125 * (1.0 - colmx) * color, - min(20.0 * colmx, 1.0)); - - return float4(color * vig * humbars(lerp(pos.y, pos.x, bardir)) * post_br * - corner((pos0 - 0.5) * BufferToViewportRatio + 0.5), - 1.0); -} - -technique CRT_Guest_NTSC -{ - pass Afterglow - { - VertexShader = PostProcessVS; - PixelShader = AfterglowPS; - RenderTarget = NTSC_T01; - } - - pass PreShader - { - VertexShader = PostProcessVS; - PixelShader = PreShaderPS; - RenderTarget = NTSC_T02; - } - - pass NTSCPASS1 - { - VertexShader = PostProcessVS; - PixelShader = Signal_1_PS; - RenderTarget = NTSC_T03; - } - - pass NTSCPASS2 - { - VertexShader = PostProcessVS; - PixelShader = Signal_2_PS; - RenderTarget = NTSC_T04; - } - - pass NTSCPASS3 - { - VertexShader = PostProcessVS; - PixelShader = Signal_3_PS; - RenderTarget = NTSC_T05; - } - - pass Sharpness - { - VertexShader = PostProcessVS; - PixelShader = SharpnessPS; - RenderTarget = NTSC_T06; - } - - pass Luminance - { - VertexShader = PostProcessVS; - PixelShader = LuminancePS; - RenderTarget = NTSC_T07; - } - - pass Linearize - { - VertexShader = PostProcessVS; - PixelShader = LinearizePS; - RenderTarget = NTSC_T08; - } - - pass CRT_Pass1 - { - VertexShader = PostProcessVS; - PixelShader = NTSC_TV1_PS; - RenderTarget = NTSC_T09; - } - - pass GaussianX - { - VertexShader = PostProcessVS; - PixelShader = HGaussianPS; - RenderTarget = NTSC_T10; - } - - pass GaussianY - { - VertexShader = PostProcessVS; - PixelShader = VGaussianPS; - RenderTarget = NTSC_T11; - } - - pass BloomHorz - { - VertexShader = PostProcessVS; - PixelShader = BloomHorzPS; - RenderTarget = NTSC_T12; - } - - pass BloomVert - { - VertexShader = PostProcessVS; - PixelShader = BloomVertPS; - RenderTarget = NTSC_T13; - } - - pass CRT_Pass2 - { - VertexShader = PostProcessVS; - PixelShader = NTSC_TV2_PS; - RenderTarget = NTSC_T14; - } - - pass Chromatic - { - VertexShader = PostProcessVS; - PixelShader = ChromaticPS; - } -} diff --git a/data/resources/shaders/reshade/Shaders/XY-Pos-free.fx b/data/resources/shaders/reshade/Shaders/XY-Pos-free.fx deleted file mode 100644 index cbec69c96..000000000 --- a/data/resources/shaders/reshade/Shaders/XY-Pos-free.fx +++ /dev/null @@ -1,84 +0,0 @@ -#include "ReShade.fxh" - -// CrashGG presents - -// 'XY-Pos-free' - -// A super-simple shader refined from the super-fast crt-cyclon.fx, It only provides -// the functions of free pixel stretching and position translation on the XY axis. -// Suitable for users who only want to fine-tune the screen zoom and position and do not like the bundled CRT-like effects. -// Fixed some bugs in the original version, adjusted the step progress and the range. - -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or (at your option) -// any later version. - - -uniform float zoomx < - ui_type = "drag"; - ui_min = -0.3000; - ui_max = 0.3000; - ui_step = 0.0005; - ui_label = "Zoom Image X"; -> = 0.0000; - -uniform float zoomy < - ui_type = "drag"; - ui_min = -0.3000; - ui_max = 0.3000; - ui_step = 0.0005; - ui_label = "Zoom Image Y"; -> = 0.0000; - -uniform float centerx < - ui_type = "drag"; - ui_min = -9.99; - ui_max = 9.99; - ui_step = 0.01; - ui_label = "Image Center X"; -> = 0.00; - -uniform float centery < - ui_type = "drag"; - ui_min = -9.99; - ui_max = 9.99; - ui_step = 0.01; - ui_label = "Image Center Y"; -> = 0.00; - - -float2 Warp(float2 pos) -{ - pos = pos*2.0-1.0; - pos *= float2(1.0+pos.y*pos.y*0, 1.0+pos.x*pos.x*0); - pos = pos*0.5+0.5; - - return pos; -} - - -float4 CRT_CYCLON_PS(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target -{ -// zoom in and center screen - float2 pos = Warp((vTexCoord*float2(1.0-zoomx,1.0-zoomy)-float2(centerx,centery)/100.0)); - -// Convergence - float3 res = tex2D(ReShade::BackBuffer,pos).rgb; - -// Vignette - float x = 0.0; - - return float4(res, 1.0); -} - - - -technique CRT_CYCLON -{ - pass PS_CRT_CYCLON - { - VertexShader = PostProcessVS; - PixelShader = CRT_CYCLON_PS; - } -} diff --git a/data/resources/shaders/reshade/Shaders/anti-aliasing/aa-shader-4.0.fx b/data/resources/shaders/reshade/Shaders/anti-aliasing/aa-shader-4.0.fx deleted file mode 100644 index b0a77ce2a..000000000 --- a/data/resources/shaders/reshade/Shaders/anti-aliasing/aa-shader-4.0.fx +++ /dev/null @@ -1,104 +0,0 @@ -#include "ReShade.fxh" - -/* - Copyright (C) 2016 guest(r) - guest.r@gmail.com - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -*/ - -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;}; - -static const float3 dt = float3(1.0,1.0,1.0); - -float3 texture2d(sampler2D tex, float2 coord, float4 yx) { - - float3 s00 = tex2D(tex, coord + yx.zw).xyz; - float3 s20 = tex2D(tex, coord + yx.xw).xyz; - float3 s22 = tex2D(tex, coord + yx.xy).xyz; - float3 s02 = tex2D(tex, coord + yx.zy).xyz; - - float m1=dot(abs(s00-s22),dt)+0.001; - float m2=dot(abs(s02-s20),dt)+0.001; - - return 0.5*(m2*(s00+s22)+m1*(s02+s20))/(m1+m2); -} - - - -float4 PS_aa_shader_40(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target -{ - // Calculating texel coordinates - float2 size = 4.0 / NormalizedNativePixelSize; - float2 inv_size = 1.0 / size; - - float4 yx = float4(inv_size, -inv_size); - - float2 OGL2Pos = vTexCoord * size; - - float2 fp = frac(OGL2Pos); - float2 dx = float2(inv_size.x,0.0); - float2 dy = float2(0.0, inv_size.y); - float2 g1 = float2(inv_size.x,inv_size.y); - float2 g2 = float2(-inv_size.x,inv_size.y); - - float2 pC4 = floor(OGL2Pos) * 1.0001 * inv_size; - - // Reading the texels - float3 C1 = texture2d(sBackBuffer, pC4 - dy, yx); - float3 C0 = texture2d(sBackBuffer, pC4 - g1, yx); - float3 C2 = texture2d(sBackBuffer, pC4 - g2, yx); - float3 C3 = texture2d(sBackBuffer, pC4 - dx, yx); - float3 C4 = texture2d(sBackBuffer, pC4 , yx); - float3 C5 = texture2d(sBackBuffer, pC4 + dx, yx); - float3 C6 = texture2d(sBackBuffer, pC4 + g2, yx); - float3 C7 = texture2d(sBackBuffer, pC4 + dy, yx); - float3 C8 = texture2d(sBackBuffer, pC4 + g1, yx); - - float3 ul, ur, dl, dr; - float m1, m2; - - m1 = dot(abs(C0-C4),dt)+0.001; - m2 = dot(abs(C1-C3),dt)+0.001; - ul = (m2*(C0+C4)+m1*(C1+C3))/(m1+m2); - - m1 = dot(abs(C1-C5),dt)+0.001; - m2 = dot(abs(C2-C4),dt)+0.001; - ur = (m2*(C1+C5)+m1*(C2+C4))/(m1+m2); - - m1 = dot(abs(C3-C7),dt)+0.001; - m2 = dot(abs(C6-C4),dt)+0.001; - dl = (m2*(C3+C7)+m1*(C6+C4))/(m1+m2); - - m1 = dot(abs(C4-C8),dt)+0.001; - m2 = dot(abs(C5-C7),dt)+0.001; - dr = (m2*(C4+C8)+m1*(C5+C7))/(m1+m2); - - float3 c11 = 0.5*((dr*fp.x+dl*(1-fp.x))*fp.y+(ur*fp.x+ul*(1-fp.x))*(1-fp.y) ); - - return float4(c11, 1.0); -} - - - -technique aa_shader_40 -{ - pass - { - VertexShader = PostProcessVS; - PixelShader = PS_aa_shader_40; - } -} diff --git a/data/resources/shaders/reshade/Shaders/blur/g-sharp2-resampler-fast.fx b/data/resources/shaders/reshade/Shaders/blur/g-sharp2-resampler-fast.fx deleted file mode 100644 index 0729f897f..000000000 --- a/data/resources/shaders/reshade/Shaders/blur/g-sharp2-resampler-fast.fx +++ /dev/null @@ -1,163 +0,0 @@ -#include "ReShade.fxh" - - -/* - G-sharp resampler 2.0 - dynamic range (upscaler, downsampler) - - Copyright (C) 2024 guest(r) - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -*/ - - - -uniform float GSHARP0 < - ui_type = "drag"; - ui_min = 0.75; - ui_max = 8.0; - ui_step = 0.05; - ui_label = "Filter Range"; -> = 2.45; - -uniform float GBOOST < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 2.5; - ui_step = 0.05; - ui_label = "Filter Boost (same range, speedup)"; -> = 1.75; - -uniform float GMAXSHARP < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Filter Sharpness"; -> = 0.1; - -uniform float GPAR < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.10; - ui_label = "Anti-Ringing"; -> = 0.50; - - -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >; -uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >; -uniform float2 ViewportSize < source = "viewportsize"; >; - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;}; - -texture2D tGSHARP2_H{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;}; -sampler2D sGSHARP2_H{Texture=tGSHARP2_H;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;}; - -#define GMAXSHARP (0.25*GBOOST*GBOOST*GMAXSHARP) - -float smothstep(float x) -{ - return exp(-2.33*x*x); -} - -float getw(float x) -{ - float z = x/GBOOST; - float y = smothstep(z); - return max(y*y - GMAXSHARP, lerp(-GMAXSHARP, 0.0, x-1.0)); -} - -float3 gsharp2(float2 tex, float2 dx, float f, sampler2D Source) -{ - float3 color = 0.0.xxx; - - float w, fp; - float wsum = 0.0; - float3 pixel; - float3 cmax = 0.0.xxx; - float3 cmin = 1.0.xxx; - float FPR = GSHARP0; - float FPR2 = 2.0*FPR; - float FPR3 = FPR2*FPR2; - float LOOPSIZE = ceil(FPR2); - float x = -LOOPSIZE+1.0; - - do - { - fp = min(abs(x+f),FPR2); - pixel = tex2D(Source, tex + x*dx).rgb; - fp = fp/FPR; - w = getw(fp); - if (w > 0.0) { cmin = min(cmin, pixel); cmax = max(cmax, pixel); } - color = color + w * pixel; - wsum = wsum + w; - - x = x + 1.0; - - } while (x <= LOOPSIZE); - - color = color / wsum; - - return lerp(clamp(color, 0.0, 1.0), clamp(color, cmin, cmax), GPAR); -} - -float4 PS_GSHARP2_H(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target -{ - float4 SourceSize = float4(1.0 / NormalizedInternalPixelSize, NormalizedInternalPixelSize); - - float2 pos = vTexCoord * SourceSize.xy-0.5; - float f = -frac(pos.x); - float2 tex = (floor(pos) + 0.5)*SourceSize.zw; - float3 color; - float2 dx = float2(SourceSize.z, 0.0); - - color = gsharp2(tex, dx, f, sBackBuffer); - - return float4(color, 1.0); -} - -float4 PS_GSHARP2_V(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target -{ - float4 SourceSize = float4((ViewportSize.x*BufferToViewportRatio.x), 1.0/NormalizedInternalPixelSize.y, 1.0/(ViewportSize.x*BufferToViewportRatio.x), NormalizedInternalPixelSize.y); - - float2 pos = vTexCoord * SourceSize.xy-0.5; - float f = -frac(pos.y); - float2 tex = (floor(pos) + 0.5)*SourceSize.zw; - float3 color; - float2 dy = float2(0.0, SourceSize.w); - - color = gsharp2(tex, dy, f, sGSHARP2_H); - - return float4(color, 1.0); -} - - - -technique GSHARP2 -{ - pass - { - VertexShader = PostProcessVS; - PixelShader = PS_GSHARP2_H; - RenderTarget = tGSHARP2_H; - } - pass - { - VertexShader = PostProcessVS; - PixelShader = PS_GSHARP2_V; - } -} diff --git a/data/resources/shaders/reshade/Shaders/blur/g-sharp2-resampler.fx b/data/resources/shaders/reshade/Shaders/blur/g-sharp2-resampler.fx deleted file mode 100644 index 128341760..000000000 --- a/data/resources/shaders/reshade/Shaders/blur/g-sharp2-resampler.fx +++ /dev/null @@ -1,145 +0,0 @@ -#include "ReShade.fxh" - - -/* - G-sharp resampler 2.0 - dynamic range (upscaler, downsampler) - - Copyright (C) 2024 guest(r) - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -*/ - - - -uniform float GSHARP0 < - ui_type = "drag"; - ui_min = 0.75; - ui_max = 8.0; - ui_step = 0.05; - ui_label = "Filter Range"; -> = 2.45; - -uniform float GBOOST < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 2.5; - ui_step = 0.05; - ui_label = "Filter Boost (same range, speedup)"; -> = 1.75; - -uniform float GMAXSHARP < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Filter Sharpness"; -> = 0.1; - -uniform float GPAR < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.10; - ui_label = "Anti-Ringing"; -> = 0.50; - - -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >; - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;}; - -#define GMAXSHARP (0.25*GBOOST*GBOOST*GMAXSHARP) - -float smothstep(float x) -{ - return exp(-2.33*x*x); -} - -float getw(float x) -{ - float z = x/GBOOST; - float y = smothstep(z); - return max(y*y - GMAXSHARP, lerp(-GMAXSHARP, 0.0, x-1.0)); -} - - -float4 PS_GSHARP2(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target -{ - float2 texCoord = vTexCoord; - float4 SourceSize = float4(1.0 / NormalizedInternalPixelSize, NormalizedInternalPixelSize); - - float2 pos = vTexCoord * SourceSize.xy-0.5; - float2 f = -frac(pos); - float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw; - float3 color = 0.0.xxx; - float2 dx = float2(SourceSize.z, 0.0); - float2 dy = float2(0.0, SourceSize.w); - - float w, fp; - float wsum = 0.0; - float3 pixel; - float3 cmax = 0.0.xxx; - float3 cmin = 1.0.xxx; - float FPR = GSHARP0; - float FPR2 = 2.0*FPR; - float FPR3 = FPR2*FPR2; - float LOOPSIZE = ceil(FPR2); - float y = -LOOPSIZE+1.0; - float x = 0.0; - - do - { - x = -LOOPSIZE + 1.0; - - do - { - fp = dot(float2(x+f.x,y+f.y),float2(x+f.x,y+f.y)); - if (fp >= FPR3) w = 0.0; - else - { - pixel = tex2D(sBackBuffer, tex + x*dx + y*dy).rgb; - fp = sqrt(fp)/FPR; - w = getw(fp); - if (w >= 0.0) { cmin = min(cmin, pixel); cmax = max(cmax, pixel); } - color = color + w * pixel; - wsum = wsum + w; - } - x = x + 1.0; - - } while (x <= LOOPSIZE); - - y = y + 1.0; - - } while (y <= LOOPSIZE); - - color = color / wsum; - - color = lerp(clamp(color, 0.0, 1.0), clamp(color, cmin, cmax), GPAR); - - return float4(color, 1.0); -} - - - -technique GSHARP2 -{ - pass - { - VertexShader = PostProcessVS; - PixelShader = PS_GSHARP2; - } -} diff --git a/data/resources/shaders/reshade/Shaders/crt-royale.fx b/data/resources/shaders/reshade/Shaders/crt-royale.fx deleted file mode 100644 index 198b7e920..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale.fx +++ /dev/null @@ -1,244 +0,0 @@ -#include "ReShade.fxh" - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade. -// Copyright (C) 2020 Alex Gunter -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - -// Enable or disable the shader -#ifndef CONTENT_BOX_VISIBLE - #define CONTENT_BOX_VISIBLE 0 -#endif - -#include "crt-royale/shaders/content-box.fxh" - -#if !CONTENT_BOX_VISIBLE - #include "crt-royale/shaders/input-blurring.fxh" - #include "crt-royale/shaders/electron-beams.fxh" - #include "crt-royale/shaders/blurring.fxh" - #include "crt-royale/shaders/deinterlace.fxh" - #include "crt-royale/shaders/phosphor-mask.fxh" - #include "crt-royale/shaders/brightpass.fxh" - #include "crt-royale/shaders/bloom.fxh" - #include "crt-royale/shaders/geometry-aa-last-pass.fxh" -#endif - - -technique CRT_Royale -{ - // Toggle the content box to help users configure it - #if CONTENT_BOX_VISIBLE - pass contentBoxPass - { - // content-box.fxh - // Draw a box that displays the crop we'll perform. - VertexShader = PostProcessVS; - PixelShader = contentBoxPixelShader; - } - #else - #if ENABLE_PREBLUR - pass PreblurVert - { - // input-blurring.fxh - // Optionally blur the input buffer a little - VertexShader = contentCropVS; - PixelShader = preblurVertPS; - - RenderTarget = texPreblurVert; - - PrimitiveTopology = TRIANGLESTRIP; - VertexCount = 4; - } - pass PreblurHoriz - { - // input-blurring.fxh - VertexShader = PostProcessVS; - PixelShader = preblurHorizPS; - - RenderTarget = texPreblurHoriz; - } - #endif - pass beamDistPass - { - // electron-beams.fxh - // Simulate emission of the interlaced video as electron beams. - VertexShader = calculateBeamDistsVS; - PixelShader = calculateBeamDistsPS; - - RenderTarget = texBeamDist; - - // This lets us improve performance by only computing the mask every k frames - ClearRenderTargets = false; - } - pass electronBeamPass - { - // electron-beams.fxh - // Simulate emission of the interlaced video as electron beams. - VertexShader = simulateEletronBeamsVS; - PixelShader = simulateEletronBeamsPS; - - RenderTarget = texElectronBeams; - - // If the preblur passes are disabled, we have to crop in this pass - #if !ENABLE_PREBLUR - PrimitiveTopology = TRIANGLESTRIP; - VertexCount = 4; - #endif - } - pass beamConvergencePass - { - // electron-beams.fxh - // Simulate beam convergence miscalibration - // Not to be confused with beam purity - VertexShader = beamConvergenceVS; - PixelShader = beamConvergencePS; - - RenderTarget = texBeamConvergence; - } - pass bloomApproxPassVert - { - // bloom.fxh - VertexShader = PostProcessVS; - PixelShader = approximateBloomVertPS; - - RenderTarget = texBloomApproxVert; - } - pass bloomApproxPassHoriz - { - // bloom.fxh - VertexShader = PostProcessVS; - PixelShader = approximateBloomHorizPS; - - RenderTarget = texBloomApproxHoriz; - } - pass blurVerticalPass - { - // blurring.fxh - // Vertically blur the approx bloom - VertexShader = blurVerticalVS; - PixelShader = blurVerticalPS; - - RenderTarget = texBlurVertical; - } - pass blurHorizontalPass - { - // blurring.fxh - // Horizontally blur the approx bloom - VertexShader = blurHorizontalVS; - PixelShader = blurHorizontalPS; - - RenderTarget = texBlurHorizontal; - } - pass deinterlacePass - { - // deinterlace.fxh - // Optionally deinterlace the video if interlacing is enabled. - // Can help approximate the original crt-royale's appearance - // without some issues like image retention. - VertexShader = deinterlaceVS; - PixelShader = deinterlacePS; - - RenderTarget = texDeinterlace; - } - pass freezeFramePass - { - // deinterlace.fxh - // Capture the current frame, so we can use it in the next - // frame's deinterlacing pass. - VertexShader = freezeFrameVS; - PixelShader = freezeFramePS; - - RenderTarget = texFreezeFrame; - - // Explicitly disable clearing render targets - // scanlineBlendPass will not work properly if this ever defaults to true - ClearRenderTargets = false; - } - pass generatePhosphorMask - { - // phosphor-mask.fxh - VertexShader = generatePhosphorMaskVS; - PixelShader = generatePhosphorMaskPS; - - RenderTarget = texPhosphorMask; - - // This lets us improve performance by only computing the mask every k frames - ClearRenderTargets = false; - - PrimitiveTopology = TRIANGLESTRIP; - VertexCount = 4; - } - pass applyPhosphormask - { - // phosphor-mask.fxh - // Tile the scaled phosphor mask and apply it to - // the deinterlaced image. - VertexShader = PostProcessVS; - PixelShader = applyComputedPhosphorMaskPS; - - RenderTarget = texMaskedScanlines; - // RenderTarget = texGeometry; - } - pass brightpassPass - { - // brightpass.fxh - // Apply a brightpass filter for the bloom effect - VertexShader = brightpassVS; - PixelShader = brightpassPS; - - RenderTarget = texBrightpass; - } - pass bloomVerticalPass - { - // bloom.fxh - // Blur vertically for the bloom effect - VertexShader = bloomVerticalVS; - PixelShader = bloomVerticalPS; - - RenderTarget = texBloomVertical; - } - pass bloomHorizontalPass - { - // bloom.fxh - // Blur horizontally for the bloom effect. - // Also apply various color changes and effects. - VertexShader = bloomHorizontalVS; - PixelShader = bloomHorizontalPS; - - RenderTarget = texBloomHorizontal; - } - pass geometryPass - { - // geometry-aa-last-pass.fxh - // Apply screen geometry and anti-aliasing. - VertexShader = geometryVS; - PixelShader = geometryPS; - - RenderTarget = texGeometry; - } - pass uncropPass - { - // content-box.fxh - // Uncrop the video, so we draw the game's content - // in the same position it started in. - VertexShader = contentUncropVS; - PixelShader = uncropContentPixelShader; - - PrimitiveTopology = TRIANGLESTRIP; - VertexCount = 4; - } - #endif -} \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/bind-shader-params.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/bind-shader-params.fxh deleted file mode 100644 index 7bce0fff6..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/bind-shader-params.fxh +++ /dev/null @@ -1,908 +0,0 @@ -#ifndef _BIND_SHADER_PARAMS_H -#define _BIND_SHADER_PARAMS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade. -// Copyright (C) 2020 Alex Gunter -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -/////////////////////////////// BEGIN INCLUDES /////////////////////////////// -#include "helper-functions-and-macros.fxh" -#include "user-settings.fxh" -#include "derived-settings-and-constants.fxh" -#include "../version-number.fxh" - -//////////////////////////////// END INCLUDES //////////////////////////////// - -// Override some parameters for gamma-management.h and tex2Dantialias.h: -#ifndef _OVERRIDE_DEVICE_GAMMA - #define _OVERRIDE_DEVICE_GAMMA 1 -#endif - -#if __RENDERER__ != 0x9000 - #define _DX9_ACTIVE 0 -#else - #define _DX9_ACTIVE 1 -#endif - -// #ifndef ANTIALIAS_OVERRIDE_BASICS -// #define ANTIALIAS_OVERRIDE_BASICS 1 -// #endif - -// #ifndef ANTIALIAS_OVERRIDE_PARAMETERS -// #define ANTIALIAS_OVERRIDE_PARAMETERS 1 -// #endif - -#ifndef ADVANCED_SETTINGS - #define ADVANCED_SETTINGS 0 -#endif - -// The width of the game's content -#ifndef CONTENT_WIDTH - #define CONTENT_WIDTH BUFFER_WIDTH -#endif -// The height of the game's content -#ifndef CONTENT_HEIGHT - #define CONTENT_HEIGHT BUFFER_HEIGHT -#endif - -#if ADVANCED_SETTINGS == 1 - // Using vertex uncropping is marginally faster, but vulnerable to DX9 weirdness. - // Most users will likely prefer the slower algorithm. - #ifndef USE_VERTEX_UNCROPPING - #define USE_VERTEX_UNCROPPING 0 - #endif - - #ifndef NUM_BEAMDIST_COLOR_SAMPLES - #define NUM_BEAMDIST_COLOR_SAMPLES 1024 - #endif - - #ifndef NUM_BEAMDIST_DIST_SAMPLES - #define NUM_BEAMDIST_DIST_SAMPLES 120 - #endif - - #ifndef BLOOMAPPROX_DOWNSIZING_FACTOR - #define BLOOMAPPROX_DOWNSIZING_FACTOR 4.0 - #endif - - // Define this internal value, so ADVANCED_SETTINGS == 0 doesn't cause a redefinition error when - // NUM_BEAMDIST_COLOR_SAMPLES defined in the preset file. Also makes it easy to avoid bugs - // related to parentheses and order-of-operations when the user defines this arithmetically. - static const uint num_beamdist_color_samples = uint(NUM_BEAMDIST_COLOR_SAMPLES); - static const uint num_beamdist_dist_samples = uint(NUM_BEAMDIST_DIST_SAMPLES); - static const float bloomapprox_downsizing_factor = float(BLOOMAPPROX_DOWNSIZING_FACTOR); -#else - static const uint USE_VERTEX_CROPPING = 0; - static const uint num_beamdist_color_samples = 1024; - static const uint num_beamdist_dist_samples = 120; - static const float bloomapprox_downsizing_factor = 4.0; -#endif - -#ifndef HIDE_HELP_SECTIONS - #define HIDE_HELP_SECTIONS 0 -#endif - - -// Offset the center of the game's content (horizontal) -#ifndef CONTENT_CENTER_X - #define CONTENT_CENTER_X 0 -#endif -// Offset the center of the game's content (vertical) -#ifndef CONTENT_CENTER_Y - #define CONTENT_CENTER_Y 0 -#endif - -// Wrap the content size in parenthesis for internal use, so the user doesn't have to -static const float2 content_size = float2(int(CONTENT_WIDTH), int(CONTENT_HEIGHT)); - -#ifndef ENABLE_PREBLUR - #define ENABLE_PREBLUR 1 -#endif - - -static const float2 buffer_size = float2(BUFFER_WIDTH, BUFFER_HEIGHT); - - -// The normalized center is 0.5 plus the normalized offset -static const float2 content_center = float2(CONTENT_CENTER_X, CONTENT_CENTER_Y) / buffer_size + 0.5; -// The content's normalized diameter d is its size divided by the buffer's size. The radius is d/2. -static const float2 content_radius = content_size / (2.0 * buffer_size); -static const float2 content_scale = content_size / buffer_size; - -static const float content_left = content_center.x - content_radius.x; -static const float content_right = content_center.x + content_radius.x; -static const float content_upper = content_center.y - content_radius.y; -static const float content_lower = content_center.y + content_radius.y; - -// The xy-offset of the top-left pixel in the content box -static const float2 content_offset = float2(content_left, content_upper); -static const float2 content_offset_from_right = float2(content_right, content_lower); - -uniform uint frame_count < source = "framecount"; >; -uniform int overlay_active < source = "overlay_active"; >; - -static const float gba_gamma = 3.5; // Irrelevant but necessary to define. - - -// === HELP AND INFO === - -uniform int APPEND_VERSION_SUFFIX(version) < - ui_text = "Version: " DOT_VERSION_STR; - ui_label = " "; - ui_type = "radio"; ->; - -uniform int basic_setup_help < - ui_text = "1. Configure the Content Box if your game has letter-boxing.\n" - "2. Configure the Phosphor Mask.\n" - "3. Configure the Scanlines.\n" - "4. Configure the Colors and Effects.\n" - "5. Configure the Screen Geometry.\n" - "6. Configure or disable Preblur\n\n" - "- In Preprocessor Definitions, set ADVANCED_SETTINGS to 1 to access more settings.\n"; - ui_category = "Basic Setup Instructions"; - ui_category_closed = true; - ui_label = " "; - ui_type = "radio"; - hidden = HIDE_HELP_SECTIONS; ->; - -uniform int content_box_help < - ui_text = "1. Expand the Preprocessor Definitions section.\n" - "2. Set CONTENT_BOX_VISIBLE to 1.\n" - "3. Use the \"CONTENT_\" parameters to configure the Content Box.\n" - "4. Align the content box with the border of your game.\n" - "5. Set CONTENT_BOX_VISIBLE to 0 when you're done.\n\n" - "Parameters to focus on:\n" - "- CONTENT_HEIGHT and CONTENT_WIDTH\n" - "- CONTENT_CENTER_X and CONTENT_CENTER_Y\n" - "- CONTENT_BOX_INSCRIBED\n\n" - "Fancy Trick 1:\n" - "\tCONTENT_HEIGHT = BUFFER_HEIGHT\n" - "\tCONTENT_WIDTH = CONTENT_HEIGHT * 4.0 / 3.0\n" - "- Good if your game fills the screen vertically and has a 4:3 aspect ratio.\n" - "- Will also rescale automatically if you resize the window.\n\n" - "Fancy Trick 2:\n" - "\tCONTENT_HEIGHT = CONTENT_WIDTH * 9.0 / 16.0\n" - "\tCONTENT_WIDTH = 1500\n" - "- Good if your game is 1500 pixels wide with a 16:9 aspect ratio.\n" - "- Won't rescale automatically, but you'd only have to change the width.\n"; - ui_category = "Content Box Instructions"; - ui_category_closed = true; - ui_label = " "; - ui_type = "radio"; - hidden = HIDE_HELP_SECTIONS; ->; - - -// ==== PHOSPHOR MASK ==== -uniform int mask_type < - #if !HIDE_HELP_SECTIONS - ui_text = "Choose which kind of CRT you want.\n\n"; - #endif - ui_label = "Mask Type"; - ui_tooltip = "Selects the phosphor shape"; - ui_type = "combo"; - ui_items = "Grille\0" - "Slot\0" - "Shadow\0" - "LowRes Grille\0" - "LowRes Slot\0" - "LowRes Shadow\0"; - - ui_category = "Phosphor Mask"; - ui_category_closed = true; -> = mask_type_static; - -uniform uint mask_size_param < - ui_label = "Mask Size Param"; - ui_tooltip = "Switch between using Mask Triad Size or Mask Num Triads"; - ui_type = "combo"; - ui_items = "Triad Width\0" - "Num Triads Across\0"; - hidden = !ADVANCED_SETTINGS; - - ui_spacing = 2; - ui_category = "Phosphor Mask"; -> = mask_size_param_static; - -uniform float mask_triad_width < - ui_label = "Mask Triad Width"; - ui_tooltip = "The width of a triad in pixels"; - ui_type = "slider"; - ui_min = 1.0; - ui_max = 60.0; - ui_step = 0.1; - - ui_category = "Phosphor Mask"; -> = mask_triad_width_static; - -uniform float mask_num_triads_across < - ui_label = "Mask Num Triads Across"; - ui_tooltip = "The number of triads in the viewport (horizontally)"; - ui_type = "drag"; - ui_min = 1.0; - ui_max = 1280.0; - ui_step = 1.0; - hidden = !ADVANCED_SETTINGS; - - ui_category = "Phosphor Mask"; -> = mask_num_triads_across_static; - -uniform float scale_triad_height< - ui_label = "Scale Triad Height"; - ui_tooltip = "Scales the height of a triad"; - ui_type = "drag"; - ui_min = 0.01; - ui_max = 10.0; - ui_step = 0.001; - - ui_spacing = 2; - ui_category = "Phosphor Mask"; -> = 1.0; - -uniform float2 phosphor_thickness < - ui_label = "Phosphor Thickness XY"; - ui_tooltip = "Makes the phosphors appear thicker in each direction"; - ui_type = "drag"; - ui_min = 0.01; - ui_max = 0.99; - ui_step = 0.01; - // hidden = !ADVANCED_SETTINGS; - - ui_category = "Phosphor Mask"; -> = 0.2; - -uniform float2 phosphor_sharpness < - ui_label = "Phosphor Sharpness XY"; - ui_tooltip = "Makes the phosphors appear more crisp in each direction"; - ui_type = "drag"; - ui_min = 1; - ui_max = 100; - ui_step = 1; - // hidden = !ADVANCED_SETTINGS; - - ui_category = "Phosphor Mask"; -> = 50; - -uniform float3 phosphor_offset_x < - ui_label = "Phosphor Offset RGB X"; - ui_tooltip = "Very slightly shifts the phosphor mask. Can help with subpixel alignment."; - ui_type = "drag"; - ui_min = -1; - ui_max = 1; - ui_step = 0.01; - // hidden = !ADVANCED_SETTINGS; - - ui_spacing = 2; - ui_category = "Phosphor Mask"; -> = 0; - -uniform float3 phosphor_offset_y < - ui_label = "Phosphor Offset RGB Y"; - ui_tooltip = "Very slightly shifts the phosphor mask. Can help with subpixel alignment."; - ui_type = "drag"; - ui_min = -1; - ui_max = 1; - ui_step = 0.01; - // hidden = !ADVANCED_SETTINGS; - - ui_category = "Phosphor Mask"; -> = 0; - -// static const uint pixel_grid_mode = 0; -// static const float2 pixel_size = 1; -/* -// ==== PIXELATION === -uniform uint pixel_grid_mode < - #if !HIDE_HELP_SECTIONS - ui_text = "- Fix issues displaying pixel art.\n" - "- Force high-res games to look low-res.\n\n"; - #endif - ui_label = "Pixel Grid Param"; - ui_tooltip = "Switch between using Pixel Size or Num Pixels"; - ui_type = "combo"; - ui_items = "Pixel Size\0" - "Content Resolution\0"; - hidden = !ADVANCED_SETTINGS; - - ui_category = "Pixelation"; - ui_category_closed = true; -> = 0; - -uniform float2 pixel_size < - #if !HIDE_HELP_SECTIONS && !ADVANCED_SETTINGS - ui_text = "- Fix issues displaying pixel art.\n" - "- Force high-res games to look low-res.\n\n"; - #endif - ui_label = "Pixel Size"; - ui_tooltip = "The size of an in-game pixel on screen, in real-world pixels"; - ui_type = "slider"; - ui_min = 1.0; - ui_max = 30.0; - ui_step = 1.0; - - ui_category = "Pixelation"; - ui_category_closed = true; -> = float2(1, 1); - -uniform float2 pixel_grid_resolution < - ui_label = "Num Pixels"; - ui_tooltip = "The number of in-game pixels displayed on-screen in each direction"; - ui_type = "drag"; - ui_min = 1.0; - ui_max = 10000.0; - ui_step = 1.0; - hidden = !ADVANCED_SETTINGS; - - ui_category = "Pixelation"; -> = content_size; -uniform float2 pixel_grid_offset < - ui_label = "Pixel Grid Offset"; - ui_tooltip = "Shifts the pixel-grid to help with alignment"; - ui_type = "slider"; - ui_min = -15.0; - ui_max = 15.0; - ui_step = 1.0; - - #if ADVANCED_SETTINGS - ui_spacing = 2; - #endif - ui_category = "Pixelation"; -> = float2(0, 0); -*/ - -// ==== SCANLINES ==== -uniform uint scanline_thickness < - #if !HIDE_HELP_SECTIONS - ui_text = "Configure the electron beams and interlacing.\n\n"; - #endif - ui_label = "Scanline Thickness"; - ui_tooltip = "Sets the height of each scanline"; - ui_type = "slider"; - ui_min = 1; - ui_max = 30; - ui_step = 1; - - ui_category = "Scanlines"; - ui_category_closed = true; -> = 2; - -uniform float scanline_offset < - ui_label = "Scanline Offset"; - ui_tooltip = "Vertically shifts the scanlines to help with alignment"; - ui_type = "slider"; - ui_min = -30; - ui_max = 30; - ui_step = 1; - hidden = !ADVANCED_SETTINGS; - - ui_category = "Scanlines"; -> = 0; - -uniform uint beam_shape_mode < - ui_label = "Beam Shape Mode"; - ui_tooltip = "Select the kind of beam to use."; - ui_type = "combo"; - ui_items = "Digital (Fast)\0" - "Linear (Simple)\0" - "Gaussian (Realistic)\0" - "Multi-Source Gaussian (Expensive)\0"; - - ui_category = "Scanlines"; -> = 1; - -uniform bool enable_interlacing < - ui_label = "Enable Interlacing"; - - ui_spacing = 5; - ui_category = "Scanlines"; -> = false; - -uniform bool interlace_back_field_first < - ui_label = "Draw Back-Field First"; - ui_tooltip = "Draw odd-numbered scanlines first (often has no effect)"; - - ui_category = "Scanlines"; -> = interlace_back_field_first_static; - -uniform uint scanline_deinterlacing_mode < - ui_label = "Deinterlacing Mode"; - ui_tooltip = "Selects the deinterlacing algorithm, if any."; - ui_type = "combo"; - ui_items = "None\0" - "Fake-Progressive\0" - "Weaving\0" - "Blended Weaving\0"; - - ui_category = "Scanlines"; -> = 1; - -uniform float deinterlacing_blend_gamma < - ui_label = "Deinterlacing Blend Gamma"; - ui_tooltip = "Nudge this if deinterlacing changes your colors too much"; - ui_type = "slider"; - ui_min = 0.01; - ui_max = 5.0; - ui_step = 0.01; - - ui_category = "Scanlines"; -> = 1.0; - -uniform float linear_beam_thickness < - ui_label = "Linear Beam Thickness"; - ui_tooltip = "Linearly widens or narrows the beam"; - ui_type = "slider"; - ui_min = 0.01; - ui_max = 3.0; - ui_step = 0.01; - - ui_spacing = 5; - ui_category = "Scanlines"; -> = 1.0; - -uniform float gaussian_beam_min_sigma < - ui_label = "Gaussian Beam Min Sigma"; - ui_tooltip = "For Gaussian Beam Shape, sets thickness of dim pixels"; - ui_type = "drag"; - ui_min = 0.0; - ui_step = 0.01; - - ui_spacing = 5; - ui_category = "Scanlines"; -> = gaussian_beam_min_sigma_static; - -uniform float gaussian_beam_max_sigma < - ui_label = "Gaussian Beam Max Sigma"; - ui_tooltip = "For Gaussian Beam Shape, sets thickness of bright pixels"; - ui_type = "drag"; - ui_min = 0.0; - ui_step = 0.01; - - ui_category = "Scanlines"; -> = gaussian_beam_max_sigma_static; - -uniform float gaussian_beam_spot_power < - ui_label = "Gaussian Beam Spot Power"; - ui_tooltip = "For Gaussian Beam Shape, balances between Min and Max Sigma"; - ui_type = "drag"; - ui_min = 0.0; - ui_step = 0.01; - - ui_category = "Scanlines"; -> = gaussian_beam_spot_power_static; - -uniform float gaussian_beam_min_shape < - ui_label = "Gaussian Beam Min Shape"; - ui_tooltip = "For Gaussian Beam Shape, sets sharpness of dim pixels"; - ui_type = "drag"; - ui_min = 0.0; - ui_step = 0.01; - hidden = !ADVANCED_SETTINGS; - - ui_spacing = 2; - ui_category = "Scanlines"; -> = gaussian_beam_min_shape_static; - -uniform float gaussian_beam_max_shape < - ui_label = "Gaussian Beam Max Shape"; - ui_tooltip = "For Gaussian Beam Shape, sets sharpness of bright pixels"; - ui_type = "drag"; - ui_min = 0.0; - ui_step = 0.01; - hidden = !ADVANCED_SETTINGS; - - ui_category = "Scanlines"; -> = gaussian_beam_max_shape_static; - -uniform float gaussian_beam_shape_power < - ui_label = "Gaussian Beam Shape Power"; - ui_tooltip = "For Gaussian Beam Shape, balances between Min and Max Shape"; - ui_type = "drag"; - ui_min = 0.0; - ui_step = 0.01; - hidden = !ADVANCED_SETTINGS; - - ui_category = "Scanlines"; -> = gaussian_beam_shape_power_static; - -uniform float3 convergence_offset_x < - ui_label = "Convergence Offset X RGB"; - ui_tooltip = "Shift the color channels horizontally"; - ui_type = "drag"; - ui_min = -10; - ui_max = 10; - ui_step = 0.05; - hidden = !ADVANCED_SETTINGS; - - ui_spacing = 5; - ui_category = "Scanlines"; -> = 0; -uniform float3 convergence_offset_y < - ui_label = "Convergence Offset Y RGB"; - ui_tooltip = "Shift the color channels vertically"; - ui_type = "drag"; - ui_min = -10; - ui_max = 10; - ui_step = 0.05; - hidden = !ADVANCED_SETTINGS; - ui_category = "Scanlines"; -> = 0; - -static uint beam_horiz_filter = beam_horiz_filter_static; -static float beam_horiz_sigma = beam_horiz_sigma_static; -static float beam_horiz_linear_rgb_weight = beam_horiz_linear_rgb_weight_static; - -// ==== IMAGE COLORIZATION ==== -uniform float crt_gamma < - #if !HIDE_HELP_SECTIONS - ui_text = "Apply gamma, contrast, and blurring.\n\n"; - #endif - ui_label = "CRT Gamma"; - ui_tooltip = "The gamma-level of the original content"; - ui_type = "slider"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 0.01; - - ui_category = "Colors and Effects"; - ui_category_closed = true; -> = crt_gamma_static; - -uniform float lcd_gamma < - ui_label = "LCD Gamma"; - ui_tooltip = "The gamma-level of your display"; - ui_type = "slider"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 0.01; - - ui_category = "Colors and Effects"; -> = lcd_gamma_static; - -uniform float levels_contrast < - ui_label = "Levels Contrast"; - ui_tooltip = "Sets the contrast of the CRT"; - ui_type = "slider"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 0.01; - - ui_spacing = 5; - ui_category = "Colors and Effects"; -> = levels_contrast_static; - -uniform float halation_weight < - ui_label = "Halation"; - ui_tooltip = "Desaturation due to eletrons exciting the wrong phosphors"; - ui_type = "slider"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - - ui_spacing = 2; - ui_category = "Colors and Effects"; -> = halation_weight_static; - -uniform float diffusion_weight < - ui_label = "Diffusion"; - ui_tooltip = "Blurring due to refraction from the screen's glass"; - ui_type = "slider"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - - ui_category = "Colors and Effects"; -> = diffusion_weight_static; - -uniform float blur_radius < - ui_label = "Blur Radius"; - ui_tooltip = "Scales the radius of the halation and diffusion effects"; - ui_type = "slider"; - ui_min = 0.01; - ui_max = 5.0; - ui_step = 0.01; - hidden = !ADVANCED_SETTINGS; - - ui_category = "Colors and Effects"; -> = 1.0; - -uniform float bloom_underestimate_levels < - ui_label = "Bloom Underestimation"; - ui_tooltip = "Scale the bloom effect's intensity"; - ui_type = "drag"; - ui_min = FIX_ZERO(0.0); - ui_step = 0.01; - - ui_spacing = 2; - ui_category = "Colors and Effects"; -> = bloom_underestimate_levels_static; - -uniform float bloom_excess < - ui_label = "Bloom Excess"; - ui_tooltip = "Extra bloom applied to all colors"; - ui_type = "slider"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - - ui_category = "Colors and Effects"; -> = bloom_excess_static; - -uniform float2 aa_subpixel_r_offset_runtime < - ui_label = "AA Subpixel R Offet XY"; - ui_type = "drag"; - ui_min = -0.5; - ui_max = 0.5; - ui_step = 0.01; - hidden = !ADVANCED_SETTINGS || !_RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS; - - ui_category = "Colors and Effects"; -> = aa_subpixel_r_offset_static; - -static const float aa_cubic_c = aa_cubic_c_static; -static const float aa_gauss_sigma = aa_gauss_sigma_static; - - -// ==== GEOMETRY ==== -uniform uint geom_rotation_mode < - #if !HIDE_HELP_SECTIONS - ui_text = "Change the geometry of the screen's glass.\n\n"; - #endif - ui_label = "Rotate Screen"; - ui_type = "combo"; - ui_items = "0 degrees\0" - "90 degrees\0" - "180 degrees\0" - "270 degrees\0"; - - ui_category = "Screen Geometry"; - ui_category_closed = true; -> = 0; -uniform uint geom_mode_runtime < - ui_label = "Geometry Mode"; - ui_tooltip = "Select screen curvature type"; - ui_type = "combo"; - ui_items = "Flat\0" - "Spherical\0" - "Spherical (Alt)\0" - "Cylindrical (Trinitron)\0"; - - ui_category = "Screen Geometry"; -> = geom_mode_static; - -uniform float geom_radius < - ui_label = "Geometry Radius"; - ui_tooltip = "Select screen curvature radius"; - ui_type = "slider"; - ui_min = 1.0 / (2.0 * pi); - ui_max = 1024; - ui_step = 0.01; - - ui_category = "Screen Geometry"; -> = geom_radius_static; - -uniform float geom_view_dist < - ui_label = "View Distance"; - ui_type = "slider"; - ui_min = 0.5; - ui_max = 1024; - ui_step = 0.01; - hidden = !ADVANCED_SETTINGS; - - ui_spacing = 2; - ui_category = "Screen Geometry"; -> = geom_view_dist_static; - -uniform float2 geom_tilt_angle < - ui_label = "Screen Tilt Angles"; - ui_type = "drag"; - ui_min = -pi; - ui_max = pi; - ui_step = 0.01; - hidden = !ADVANCED_SETTINGS; - - ui_category = "Screen Geometry"; -> = geom_tilt_angle_static; - -uniform float2 geom_aspect_ratio < - ui_label = "Screen Aspect Ratios"; - ui_type = "drag"; - ui_min = 1.0; - ui_step = 0.01; - hidden = !ADVANCED_SETTINGS; - - ui_category = "Screen Geometry"; -> = float2(geom_aspect_ratio_static, 1); -uniform float2 geom_overscan < - ui_label = "Geom Overscan"; - ui_type = "drag"; - ui_min = FIX_ZERO(0.0); - ui_step = 0.01; - hidden = !ADVANCED_SETTINGS; - - ui_spacing = 2; - ui_category = "Screen Geometry"; -> = geom_overscan_static; - -// ==== BORDER ==== -uniform float border_size < - #if !HIDE_HELP_SECTIONS - ui_text = "Apply a thin vignette to the edge of the screen.\n\n"; - #endif - ui_label = "Border Size"; - ui_category_closed = true; - ui_type = "slider"; - ui_min = 0.0; - ui_max = 0.5; - ui_step = 0.01; - - ui_category = "Screen Border"; -> = border_size_static; - -uniform float border_darkness < - ui_label = "Border Darkness"; - ui_type = "drag"; - ui_min = 0.0; - ui_step = 0.01; - - ui_category = "Screen Border"; -> = border_darkness_static; - -uniform float border_compress < - ui_label = "Border Compress"; - ui_type = "drag"; - ui_min = 0.0; - ui_step = 0.01; - - ui_category = "Screen Border"; -> = border_compress_static; - -// ==== PREBLUR ==== -#if ENABLE_PREBLUR - uniform float2 preblur_effect_radius < - #if !HIDE_HELP_SECTIONS - ui_text = "- Apply a linear blur to the input image. Kind of like an NTSC/Composite shader, but much faster.\n" - "- If you want to use an NTSC shader or don't like this effect, disable it by setting ENABLE_PREBLUR to 0\n" - "- If you leave all of these set to 0, then they don't do anything. Consider disabling the effect to improve performance.\n\n"; - #endif - ui_type = "drag"; - ui_min = 0; - ui_max = 100; - ui_step = 1; - ui_label = "Effect Radius XY"; - ui_tooltip = "The radius of the effect visible on the screen (measured in pixels)"; - - ui_category = "Pre-Blur"; - ui_category_closed = true; - > = 0; - uniform uint2 preblur_sampling_radius < - ui_type = "drag"; - ui_min = 0; - ui_max = 100; - ui_step = 1; - ui_label = "Sampling Radius XY"; - ui_tooltip = "The number of samples to take on either side of each pixel"; - - ui_category = "Pre-Blur"; - > = 0; -#else - static const float2 preblur_effect_radius = 0; - static const uint2 preblur_sampling_radius = 0; -#endif - -// Provide accessors for vector constants that pack scalar uniforms: -float2 get_aspect_vector(const float geom_aspect_ratio) -{ - // Get an aspect ratio vector. Enforce geom_max_aspect_ratio, and prevent - // the absolute scale from affecting the uv-mapping for curvature: - const float geom_clamped_aspect_ratio = - min(geom_aspect_ratio, geom_max_aspect_ratio); - const float2 geom_aspect = - normalize(float2(geom_clamped_aspect_ratio, 1.0)); - return geom_aspect; -} - -float2 get_geom_overscan_vector() -{ - return geom_overscan; -} - -float2 get_geom_tilt_angle_vector() -{ - return geom_tilt_angle; -} - -float3 get_convergence_offsets_x_vector() -{ - return convergence_offset_x; -} - -float3 get_convergence_offsets_y_vector() -{ - return convergence_offset_y; -} - -float2 get_convergence_offsets_r_vector() -{ - return float2(convergence_offset_x.r, convergence_offset_y.r); -} - -float2 get_convergence_offsets_g_vector() -{ - return float2(convergence_offset_x.g, convergence_offset_y.g); -} - -float2 get_convergence_offsets_b_vector() -{ - return float2(convergence_offset_x.b, convergence_offset_y.b); -} - -float2 get_aa_subpixel_r_offset() -{ - #if _RUNTIME_ANTIALIAS_WEIGHTS - #if _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS - // WARNING: THIS IS EXTREMELY EXPENSIVE. - return aa_subpixel_r_offset_runtime; - #else - return aa_subpixel_r_offset_static; - #endif - #else - return aa_subpixel_r_offset_static; - #endif -} - -// Provide accessors settings which still need "cooking:" -float get_mask_amplify() -{ - static const float mask_grille_amplify = 1.0/mask_grille_avg_color; - static const float mask_slot_amplify = 1.0/mask_slot_avg_color; - static const float mask_shadow_amplify = 1.0/mask_shadow_avg_color; - - float mask_amplify; - [flatten] - switch (mask_type) { - case 0: - mask_amplify = mask_grille_amplify; - break; - case 1: - mask_amplify = mask_slot_amplify; - break; - case 2: - mask_amplify = mask_shadow_amplify; - break; - case 3: - mask_amplify = mask_grille_amplify; - break; - case 4: - mask_amplify = mask_slot_amplify; - break; - default: - mask_amplify = mask_shadow_amplify; - break; - - } - - return mask_amplify; -} - -#endif // _BIND_SHADER_PARAMS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/bloom-functions.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/bloom-functions.fxh deleted file mode 100644 index 2c6456ace..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/bloom-functions.fxh +++ /dev/null @@ -1,320 +0,0 @@ -#ifndef _BLOOM_FUNCTIONS_H -#define _BLOOM_FUNCTIONS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// These utility functions and constants help several passes determine the -// size and center texel weight of the phosphor bloom in a uniform manner. - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -// We need to calculate the correct blur sigma using some .cgp constants: -//#include "../user-settings.h" - - -#include "user-settings.fxh" -#include "derived-settings-and-constants.fxh" -#include "bind-shader-params.fxh" -#include "blur-functions.fxh" - -/////////////////////////////// BLOOM CONSTANTS ////////////////////////////// - -// Compute constants with manual inlines of the functions below: -static const float bloom_diff_thresh = 1.0/256.0; - - - -/////////////////////////////////// HELPERS ////////////////////////////////// - -float get_min_sigma_to_blur_triad(const float triad_size, - const float thresh) -{ - // Requires: 1.) triad_size is the final phosphor triad size in pixels - // 2.) thresh is the max desired pixel difference in the - // blurred triad (e.g. 1.0/256.0). - // Returns: Return the minimum sigma that will fully blur a phosphor - // triad on the screen to an even color, within thresh. - // This closed-form function was found by curve-fitting data. - // Estimate: max error = ~0.086036, mean sq. error = ~0.0013387: - return -0.05168 + 0.6113*triad_size - - 1.122*triad_size*sqrt(0.000416 + thresh); - // Estimate: max error = ~0.16486, mean sq. error = ~0.0041041: - //return 0.5985*triad_size - triad_size*sqrt(thresh) -} - -float get_absolute_scale_blur_sigma(const float thresh) -{ - // Requires: 1.) min_expected_triads must be a global float. The number - // of horizontal phosphor triads in the final image must be - // >= min_allowed_viewport_triads.x for realistic results. - // 2.) bloom_approx_scale_x must be a global float equal to the - // absolute horizontal scale of BLOOM_APPROX. - // 3.) bloom_approx_scale_x/min_allowed_viewport_triads.x - // should be <= 1.1658025090 to keep the final result < - // 0.62666015625 (the largest sigma ensuring the largest - // unused texel weight stays < 1.0/256.0 for a 3x3 blur). - // 4.) thresh is the max desired pixel difference in the - // blurred triad (e.g. 1.0/256.0). - // Returns: Return the minimum Gaussian sigma that will blur the pass - // output as much as it would have taken to blur away - // bloom_approx_scale_x horizontal phosphor triads. - // Description: - // BLOOM_APPROX should look like a downscaled phosphor blur. Ideally, we'd - // use the same blur sigma as the actual phosphor bloom and scale it down - // to the current resolution with (bloom_approx_scale_x/viewport_size_x), but - // we don't know the viewport size in this pass. Instead, we'll blur as - // much as it would take to blur away min_allowed_viewport_triads.x. This - // will blur "more than necessary" if the user actually uses more triads, - // but that's not terrible either, because blurring a constant fraction of - // the viewport may better resemble a true optical bloom anyway (since the - // viewport will generally be about the same fraction of each player's - // field of view, regardless of screen size and resolution). - // Assume an extremely large viewport size for asymptotic results. - return bloom_approx_scale_x/max_viewport_size_x * - get_min_sigma_to_blur_triad( - max_viewport_size_x/min_allowed_viewport_triads.x, thresh); -} - -float get_center_weight(const float sigma) -{ - // Given a Gaussian blur sigma, get the blur weight for the center texel. - #if _RUNTIME_PHOSPHOR_BLOOM_SIGMA - return get_fast_gaussian_weight_sum_inv(sigma); - #else - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float w6 = exp(-36.0 * denom_inv); - const float w7 = exp(-49.0 * denom_inv); - const float w8 = exp(-64.0 * denom_inv); - const float w9 = exp(-81.0 * denom_inv); - const float w10 = exp(-100.0 * denom_inv); - const float w11 = exp(-121.0 * denom_inv); - const float w12 = exp(-144.0 * denom_inv); - const float w13 = exp(-169.0 * denom_inv); - const float w14 = exp(-196.0 * denom_inv); - const float w15 = exp(-225.0 * denom_inv); - const float w16 = exp(-256.0 * denom_inv); - const float w17 = exp(-289.0 * denom_inv); - const float w18 = exp(-324.0 * denom_inv); - const float w19 = exp(-361.0 * denom_inv); - const float w20 = exp(-400.0 * denom_inv); - const float w21 = exp(-441.0 * denom_inv); - // Note: If the implementation uses a smaller blur than the max allowed, - // the worst case scenario is that the center weight will be overestimated, - // so we'll put a bit more energy into the brightpass...no huge deal. - // Then again, if the implementation uses a larger blur than the max - // "allowed" because of dynamic branching, the center weight could be - // underestimated, which is more of a problem...consider always using - #if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS - // 43x blur: - const float weight_sum_inv = 1.0 / - (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + - w11 + w12 + w13 + w14 + w15 + w16 + w17 + w18 + w19 + w20 + w21)); - #else - #if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS - // 31x blur: - const float weight_sum_inv = 1.0 / - (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + - w8 + w9 + w10 + w11 + w12 + w13 + w14 + w15)); - #else - #if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS - // 25x blur: - const float weight_sum_inv = 1.0 / (w0 + 2.0 * ( - w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12)); - #else - #if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS - // 17x blur: - const float weight_sum_inv = 1.0 / (w0 + 2.0 * ( - w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8)); - #else - // 9x blur: - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4)); - #endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS - #endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS - #endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS - #endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS - const float center_weight = weight_sum_inv * weight_sum_inv; - return center_weight; - #endif -} - -float3 tex2DblurNfast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // If sigma is static, we can safely branch and use the smallest blur - // that's big enough. Ignore #define hints, because we'll only use a - // large blur if we actually need it, and the branches cost nothing. - #if !_RUNTIME_PHOSPHOR_BLOOM_SIGMA - #define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE - #else - // It's still worth branching if the profile supports dynamic branches: - // It's much faster than using a hugely excessive blur, but each branch - // eats ~1% FPS. - #if _DRIVERS_ALLOW_DYNAMIC_BRANCHES - #define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE - #endif - #endif - // Failed optimization notes: - // I originally created a same-size mipmapped 5-tap separable blur10 that - // could handle any sigma by reaching into lower mip levels. It was - // as fast as blur25fast for runtime sigmas and a tad faster than - // blur31fast for static sigmas, but mipmapping two viewport-size passes - // ate 10% of FPS across all codepaths, so it wasn't worth it. - #ifdef PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE - if(sigma <= blur9_std_dev) - { - return tex2Dblur9fast(tex, tex_uv, dxdy, sigma, input_gamma); - } - else if(sigma <= blur17_std_dev) - { - return tex2Dblur17fast(tex, tex_uv, dxdy, sigma, input_gamma); - } - else if(sigma <= blur25_std_dev) - { - return tex2Dblur25fast(tex, tex_uv, dxdy, sigma, input_gamma); - } - else if(sigma <= blur31_std_dev) - { - return tex2Dblur31fast(tex, tex_uv, dxdy, sigma, input_gamma); - } - else - { - return tex2Dblur43fast(tex, tex_uv, dxdy, sigma, input_gamma); - } - #else - // If we can't afford to branch, we can only guess at what blur - // size we need. Therefore, use the largest blur allowed. - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS - return tex2Dblur43fast(tex, tex_uv, dxdy, sigma, input_gamma); - #else - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS - return tex2Dblur31fast(tex, tex_uv, dxdy, sigma, input_gamma); - #else - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS - return tex2Dblur25fast(tex, tex_uv, dxdy, sigma, input_gamma); - #else - #if PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS - return tex2Dblur17fast(tex, tex_uv, dxdy, sigma, input_gamma); - #else - return tex2Dblur9fast(tex, tex_uv, dxdy, sigma, input_gamma); - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS - #endif // PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE -} - -float get_bloom_approx_sigma(const float output_size_x_runtime, - const float estimated_viewport_size_x) -{ - // Requires: 1.) output_size_x_runtime == BLOOM_APPROX.output_size.x. - // This is included for dynamic codepaths just in case the - // following two globals are incorrect: - // 2.) bloom_approx_size_x_for_skip should == the same - // if PHOSPHOR_BLOOM_FAKE is #defined - // 3.) bloom_approx_size_x should == the same otherwise - // Returns: For gaussian4x4, return a dynamic small bloom sigma that's - // as close to optimal as possible given available information. - // For blur3x3, return the a static small bloom sigma that - // works well for typical cases. Otherwise, we're using simple - // bilinear filtering, so use static calculations. - // Assume the default static value. This is a compromise that ensures - // typical triads are blurred, even if unusually large ones aren't. - static const float mask_num_triads_static = - max(min_allowed_viewport_triads.x, mask_num_triads_across_static); - const float mask_num_triads_from_size = - estimated_viewport_size_x/mask_triad_width; - const float mask_num_triads_runtime = max(min_allowed_viewport_triads.x, - lerp(mask_num_triads_from_size, mask_num_triads_across, - mask_size_param)); - // Assume an extremely large viewport size for asymptotic results: - static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0); - if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize - { - // Use the runtime num triads and output size: - const float asymptotic_triad_size = - max_viewport_size_x/mask_num_triads_runtime; - const float asymptotic_sigma = get_min_sigma_to_blur_triad( - asymptotic_triad_size, bloom_diff_thresh); - const float bloom_approx_sigma = - asymptotic_sigma * output_size_x_runtime/max_viewport_size_x; - // The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but - // account for the Gaussian scanline sigma from the last pass too. - // The bloom will be too wide horizontally but tall enough vertically. - return length(float2(bloom_approx_sigma, gaussian_beam_max_sigma)); - } - else // 3x3 blur resize (the bilinear resize doesn't need a sigma) - { - // We're either using blur3x3 or bilinear filtering. The biggest - // reason to choose blur3x3 is to avoid dynamic weights, so use a - // static calculation. - #ifdef PHOSPHOR_BLOOM_FAKE - static const float output_size_x_static = - bloom_approx_size_x_for_fake; - #else - static const float output_size_x_static = bloom_approx_size_x; - #endif - static const float asymptotic_triad_size = - max_viewport_size_x/mask_num_triads_static; - const float asymptotic_sigma = get_min_sigma_to_blur_triad( - asymptotic_triad_size, bloom_diff_thresh); - const float bloom_approx_sigma = - asymptotic_sigma * output_size_x_static/max_viewport_size_x; - // The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but - // try accounting for the Gaussian scanline sigma from the last pass - // too; use the static default value: - return length(float2(bloom_approx_sigma, gaussian_beam_max_sigma_static)); - } -} - -float get_final_bloom_sigma(const float bloom_sigma_runtime) -{ - // Requires: 1.) bloom_sigma_runtime is a precalculated sigma that's - // optimal for the [known] triad size. - // 2.) Call this from a fragment shader (not a vertex shader), - // or blurring with static sigmas won't be constant-folded. - // Returns: Return the optimistic static sigma if the triad size is - // known at compile time. Otherwise return the optimal runtime - // sigma (10% slower) or an implementation-specific compromise - // between an optimistic or pessimistic static sigma. - // Notes: Call this from the fragment shader, NOT the vertex shader, - // so static sigmas can be constant-folded! - const float bloom_sigma_optimistic = get_min_sigma_to_blur_triad( - mask_triad_width_static, bloom_diff_thresh); - #if _RUNTIME_PHOSPHOR_BLOOM_SIGMA - return bloom_sigma_runtime; - #else - // Overblurring looks as bad as underblurring, so assume average-size - // triads, not worst-case huge triads: - return bloom_sigma_optimistic; - #endif -} - - -#endif // _BLOOM_FUNCTIONS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/blur-functions.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/blur-functions.fxh deleted file mode 100644 index 2d81bfc03..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/blur-functions.fxh +++ /dev/null @@ -1,1966 +0,0 @@ -#ifndef _BLUR_FUNCTIONS_H -#define _BLUR_FUNCTIONS_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey -// Copyright (C) 2020 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// This file provides reusable one-pass and separable (two-pass) blurs. -// Requires: All blurs share these requirements (dxdy requirement is split): -// 1.) All requirements of gamma-management.h must be satisfied! -// 2.) filter_linearN must == "true" in your .cgp preset unless -// you're using tex2DblurNresize at 1x scale. -// 3.) mipmap_inputN must == "true" in your .cgp preset if -// output_size < video_size. -// 4.) output_size == video_size / pow(2, M), where M is some -// positive integer. tex2Dblur*resize can resize arbitrarily -// (and the blur will be done after resizing), but arbitrary -// resizes "fail" with other blurs due to the way they mix -// static weights with bilinear sample exploitation. -// 5.) In general, dxdy should contain the uv pixel spacing: -// dxdy = (video_size/output_size)/texture_size -// 6.) For separable blurs (tex2DblurNresize and tex2DblurNfast), -// zero out the dxdy component in the unblurred dimension: -// dxdy = float2(dxdy.x, 0.0) or float2(0.0, dxdy.y) -// Many blurs share these requirements: -// 1.) One-pass blurs require scale_xN == scale_yN or scales > 1.0, -// or they will blur more in the lower-scaled dimension. -// 2.) One-pass shared sample blurs require ddx(), ddy(), and -// tex2Dlod() to be supported by the current Cg profile, and -// the drivers must support high-quality derivatives. -// 3.) One-pass shared sample blurs require: -// tex_uv.w == log2(video_size/output_size).y; -// Non-wrapper blurs share this requirement: -// 1.) sigma is the intended standard deviation of the blur -// Wrapper blurs share this requirement, which is automatically -// met (unless OVERRIDE_BLUR_STD_DEVS is #defined; see below): -// 1.) blurN_std_dev must be global static const float values -// specifying standard deviations for Nx blurs in units -// of destination pixels -// Optional: 1.) The including file (or an earlier included file) may -// optionally #define USE_BINOMIAL_BLUR_STD_DEVS to replace -// default standard deviations with those matching a binomial -// distribution. (See below for details/properties.) -// 2.) The including file (or an earlier included file) may -// optionally #define OVERRIDE_BLUR_STD_DEVS and override: -// static const float blur3_std_dev -// static const float blur4_std_dev -// static const float blur5_std_dev -// static const float blur6_std_dev -// static const float blur7_std_dev -// static const float blur8_std_dev -// static const float blur9_std_dev -// static const float blur10_std_dev -// static const float blur11_std_dev -// static const float blur12_std_dev -// static const float blur17_std_dev -// static const float blur25_std_dev -// static const float blur31_std_dev -// static const float blur43_std_dev -// 3.) The including file (or an earlier included file) may -// optionally #define OVERRIDE_ERROR_BLURRING and override: -// static const float error_blurring -// This tuning value helps mitigate weighting errors from one- -// pass shared-sample blurs sharing bilinear samples between -// fragments. Values closer to 0.0 have "correct" blurriness -// but allow more artifacts, and values closer to 1.0 blur away -// artifacts by sampling closer to halfway between texels. -// UPDATE 6/21/14: The above static constants may now be overridden -// by non-static uniform constants. This permits exposing blur -// standard deviations as runtime GUI shader parameters. However, -// using them keeps weights from being statically computed, and the -// speed hit depends on the blur: On my machine, uniforms kill over -// 53% of the framerate with tex2Dblur12x12shared, but they only -// drop the framerate by about 18% with tex2Dblur11fast. -// Quality and Performance Comparisons: -// For the purposes of the following discussion, "no sRGB" means -// GAMMA_ENCODE_EVERY_FBO is #defined, and "sRGB" means it isn't. -// 1.) tex2DblurNfast is always faster than tex2DblurNresize. -// 2.) tex2DblurNresize functions are the only ones that can arbitrarily resize -// well, because they're the only ones that don't exploit bilinear samples. -// This also means they're the only functions which can be truly gamma- -// correct without linear (or sRGB FBO) input, but only at 1x scale. -// 3.) One-pass shared sample blurs only have a speed advantage without sRGB. -// They also have some inaccuracies due to their shared-[bilinear-]sample -// design, which grow increasingly bothersome for smaller blurs and higher- -// frequency source images (relative to their resolution). I had high -// hopes for them, but their most realistic use case is limited to quickly -// reblurring an already blurred input at full resolution. Otherwise: -// a.) If you're blurring a low-resolution source, you want a better blur. -// b.) If you're blurring a lower mipmap, you want a better blur. -// c.) If you're blurring a high-resolution, high-frequency source, you -// want a better blur. -// 4.) The one-pass blurs without shared samples grow slower for larger blurs, -// but they're competitive with separable blurs at 5x5 and smaller, and -// even tex2Dblur7x7 isn't bad if you're wanting to conserve passes. -// Here are some framerates from a GeForce 8800GTS. The first pass resizes to -// viewport size (4x in this test) and linearizes for sRGB codepaths, and the -// remaining passes perform 6 full blurs. Mipmapped tests are performed at the -// same scale, so they just measure the cost of mipmapping each FBO (only every -// other FBO is mipmapped for separable blurs, to mimic realistic usage). -// Mipmap Neither sRGB+Mipmap sRGB Function -// 76.0 92.3 131.3 193.7 tex2Dblur3fast -// 63.2 74.4 122.4 175.5 tex2Dblur3resize -// 93.7 121.2 159.3 263.2 tex2Dblur3x3 -// 59.7 68.7 115.4 162.1 tex2Dblur3x3resize -// 63.2 74.4 122.4 175.5 tex2Dblur5fast -// 49.3 54.8 100.0 132.7 tex2Dblur5resize -// 59.7 68.7 115.4 162.1 tex2Dblur5x5 -// 64.9 77.2 99.1 137.2 tex2Dblur6x6shared -// 55.8 63.7 110.4 151.8 tex2Dblur7fast -// 39.8 43.9 83.9 105.8 tex2Dblur7resize -// 40.0 44.2 83.2 104.9 tex2Dblur7x7 -// 56.4 65.5 71.9 87.9 tex2Dblur8x8shared -// 49.3 55.1 99.9 132.5 tex2Dblur9fast -// 33.3 36.2 72.4 88.0 tex2Dblur9resize -// 27.8 29.7 61.3 72.2 tex2Dblur9x9 -// 37.2 41.1 52.6 60.2 tex2Dblur10x10shared -// 44.4 49.5 91.3 117.8 tex2Dblur11fast -// 28.8 30.8 63.6 75.4 tex2Dblur11resize -// 33.6 36.5 40.9 45.5 tex2Dblur12x12shared -// TODO: Fill in benchmarks for new untested blurs. -// tex2Dblur17fast -// tex2Dblur25fast -// tex2Dblur31fast -// tex2Dblur43fast -// tex2Dblur3x3resize - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -// Set static standard deviations, but allow users to override them with their -// own constants (even non-static uniforms if they're okay with the speed hit): -#ifndef OVERRIDE_BLUR_STD_DEVS - // blurN_std_dev values are specified in terms of dxdy strides. - #ifdef USE_BINOMIAL_BLUR_STD_DEVS - // By request, we can define standard deviations corresponding to a - // binomial distribution with p = 0.5 (related to Pascal's triangle). - // This distribution works such that blurring multiple times should - // have the same result as a single larger blur. These values are - // larger than default for blurs up to 6x and smaller thereafter. - static const float blur3_std_dev = 0.84931640625; - static const float blur4_std_dev = 0.84931640625; - static const float blur5_std_dev = 1.0595703125; - static const float blur6_std_dev = 1.06591796875; - static const float blur7_std_dev = 1.17041015625; - static const float blur8_std_dev = 1.1720703125; - static const float blur9_std_dev = 1.2259765625; - static const float blur10_std_dev = 1.21982421875; - static const float blur11_std_dev = 1.25361328125; - static const float blur12_std_dev = 1.2423828125; - static const float blur17_std_dev = 1.27783203125; - static const float blur25_std_dev = 1.2810546875; - static const float blur31_std_dev = 1.28125; - static const float blur43_std_dev = 1.28125; - #else - // The defaults are the largest values that keep the largest unused - // blur term on each side <= 1.0/256.0. (We could get away with more - // or be more conservative, but this compromise is pretty reasonable.) - static const float blur3_std_dev = 0.62666015625; - static const float blur4_std_dev = 0.66171875; - static const float blur5_std_dev = 0.9845703125; - static const float blur6_std_dev = 1.02626953125; - static const float blur7_std_dev = 1.36103515625; - static const float blur8_std_dev = 1.4080078125; - static const float blur9_std_dev = 1.7533203125; - static const float blur10_std_dev = 1.80478515625; - static const float blur11_std_dev = 2.15986328125; - static const float blur12_std_dev = 2.215234375; - static const float blur17_std_dev = 3.45535583496; - static const float blur25_std_dev = 5.3409576416; - static const float blur31_std_dev = 6.86488037109; - static const float blur43_std_dev = 10.1852050781; - #endif // USE_BINOMIAL_BLUR_STD_DEVS -#endif // OVERRIDE_BLUR_STD_DEVS - -#ifndef OVERRIDE_ERROR_BLURRING - // error_blurring should be in [0.0, 1.0]. Higher values reduce ringing - // in shared-sample blurs but increase blurring and feature shifting. - static const float error_blurring = 0.5; -#endif - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -// gamma-management.h relies on pass-specific settings to guide its behavior: -// FIRST_PASS, LAST_PASS, GAMMA_ENCODE_EVERY_FBO, etc. See it for details. -//#include "gamma-management.h" - - -#include "gamma-management.fxh" -#include "quad-pixel-communication.fxh" -#include "special-functions.fxh" - -//////////////////////////////// END INCLUDES //////////////////////////////// - -/////////////////////////////////// HELPERS ////////////////////////////////// - -float4 uv2_to_uv4(float2 tex_uv) -{ - // Make a float2 uv offset safe for adding to float4 tex2Dlod coords: - return float4(tex_uv, 0.0, 0.0); -} - -// Make a length squared helper macro (for usage with static constants): -#define LENGTH_SQ(vec) (dot(vec, vec)) - -float get_fast_gaussian_weight_sum_inv(const float sigma) -{ - // We can use the Gaussian integral to calculate the asymptotic weight for - // the center pixel. Since the unnormalized center pixel weight is 1.0, - // the normalized weight is the same as the weight sum inverse. Given a - // large enough blur (9+), the asymptotic weight sum is close and faster: - // center_weight = 0.5 * - // (erf(0.5/(sigma*sqrt(2.0))) - erf(-0.5/(sigma*sqrt(2.0)))) - // erf(-x) == -erf(x), so we get 0.5 * (2.0 * erf(blah blah)): - // However, we can get even faster results with curve-fitting. These are - // also closer than the asymptotic results, because they were constructed - // from 64 blurs sizes from [3, 131) and 255 equally-spaced sigmas from - // (0, blurN_std_dev), so the results for smaller sigmas are biased toward - // smaller blurs. The max error is 0.0031793913. - // Relative FPS: 134.3 with erf, 135.8 with curve-fitting. - //static const float temp = 0.5/sqrt(2.0); - //return erf(temp/sigma); - return min(exp(exp(0.348348412457428/ - (sigma - 0.0860587260734721))), 0.399334576340352/sigma); -} - - -//////////////////// ARBITRARILY RESIZABLE SEPARABLE BLURS /////////////////// - -float3 tex2Dblur11resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 1D 11x Gaussian blurred texture lookup using a 11-tap blur. - // It may be mipmapped depending on settings and dxdy. - // Calculate Gaussian blur kernel weights and a normalization factor for - // distances of 0-4, ignoring constant factors (since we're normalizing). - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float weight_sum_inv = 1.0 / - (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5)); - // Statically normalize weights, sum weighted samples, and return. Blurs are - // currently optimized for dynamic weights. - float3 sum = float3(0.0,0.0,0.0); - sum += w5 * tex2D_linearize(tex, tex_uv - 5.0 * dxdy, input_gamma).rgb; - sum += w4 * tex2D_linearize(tex, tex_uv - 4.0 * dxdy, input_gamma).rgb; - sum += w3 * tex2D_linearize(tex, tex_uv - 3.0 * dxdy, input_gamma).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv - 2.0 * dxdy, input_gamma).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv - 1.0 * dxdy, input_gamma).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv, input_gamma).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv + 1.0 * dxdy, input_gamma).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv + 2.0 * dxdy, input_gamma).rgb; - sum += w3 * tex2D_linearize(tex, tex_uv + 3.0 * dxdy, input_gamma).rgb; - sum += w4 * tex2D_linearize(tex, tex_uv + 4.0 * dxdy, input_gamma).rgb; - sum += w5 * tex2D_linearize(tex, tex_uv + 5.0 * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur9resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 1D 9x Gaussian blurred texture lookup using a 9-tap blur. - // It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4)); - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w4 * tex2D_linearize(tex, tex_uv - 4.0 * dxdy, input_gamma).rgb; - sum += w3 * tex2D_linearize(tex, tex_uv - 3.0 * dxdy, input_gamma).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv - 2.0 * dxdy, input_gamma).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv - 1.0 * dxdy, input_gamma).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv, input_gamma).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv + 1.0 * dxdy, input_gamma).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv + 2.0 * dxdy, input_gamma).rgb; - sum += w3 * tex2D_linearize(tex, tex_uv + 3.0 * dxdy, input_gamma).rgb; - sum += w4 * tex2D_linearize(tex, tex_uv + 4.0 * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur7resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 1D 7x Gaussian blurred texture lookup using a 7-tap blur. - // It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3)); - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w3 * tex2D_linearize(tex, tex_uv - 3.0 * dxdy, input_gamma).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv - 2.0 * dxdy, input_gamma).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv - 1.0 * dxdy, input_gamma).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv, input_gamma).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv + 1.0 * dxdy, input_gamma).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv + 2.0 * dxdy, input_gamma).rgb; - sum += w3 * tex2D_linearize(tex, tex_uv + 3.0 * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur5resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 1D 5x Gaussian blurred texture lookup using a 5-tap blur. - // It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2)); - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w2 * tex2D_linearize(tex, tex_uv - 2.0 * dxdy, input_gamma).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv - 1.0 * dxdy, input_gamma).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv, input_gamma).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv + 1.0 * dxdy, input_gamma).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv + 2.0 * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur3resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 1D 3x Gaussian blurred texture lookup using a 3-tap blur. - // It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * w1); - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w1 * tex2D_linearize(tex, tex_uv - 1.0 * dxdy, input_gamma).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv, input_gamma).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv + 1.0 * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - - -/////////////////////////// FAST SEPARABLE BLURS /////////////////////////// - -float3 tex2Dblur11fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: 1.) Global requirements must be met (see file description). - // 2.) filter_linearN must = "true" in your .cgp file. - // 3.) For gamma-correct bilinear filtering, global - // gamma_aware_bilinear == true (from gamma-management.h) - // Returns: A 1D 11x Gaussian blurred texture lookup using 6 linear - // taps. It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float weight_sum_inv = 1.0 / - (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5)); - // Calculate combined weights and linear sample ratios between texel pairs. - // The center texel (with weight w0) is used twice, so halve its weight. - const float w01 = w0 * 0.5 + w1; - const float w23 = w2 + w3; - const float w45 = w4 + w5; - const float w01_ratio = w1/w01; - const float w23_ratio = w3/w23; - const float w45_ratio = w5/w45; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w45 * tex2D_linearize(tex, tex_uv - (4.0 + w45_ratio) * dxdy, input_gamma).rgb; - sum += w23 * tex2D_linearize(tex, tex_uv - (2.0 + w23_ratio) * dxdy, input_gamma).rgb; - sum += w01 * tex2D_linearize(tex, tex_uv - w01_ratio * dxdy, input_gamma).rgb; - sum += w01 * tex2D_linearize(tex, tex_uv + w01_ratio * dxdy, input_gamma).rgb; - sum += w23 * tex2D_linearize(tex, tex_uv + (2.0 + w23_ratio) * dxdy, input_gamma).rgb; - sum += w45 * tex2D_linearize(tex, tex_uv + (4.0 + w45_ratio) * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur9fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 9x Gaussian blurred texture lookup using 1 nearest - // neighbor and 4 linear taps. It may be mipmapped depending - // on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4)); - // Calculate combined weights and linear sample ratios between texel pairs. - const float w12 = w1 + w2; - const float w34 = w3 + w4; - const float w12_ratio = w2/w12; - const float w34_ratio = w4/w34; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w34 * tex2D_linearize(tex, tex_uv - (3.0 + w34_ratio) * dxdy, input_gamma).rgb; - sum += w12 * tex2D_linearize(tex, tex_uv - (1.0 + w12_ratio) * dxdy, input_gamma).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv, input_gamma).rgb; - sum += w12 * tex2D_linearize(tex, tex_uv + (1.0 + w12_ratio) * dxdy, input_gamma).rgb; - sum += w34 * tex2D_linearize(tex, tex_uv + (3.0 + w34_ratio) * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur7fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 7x Gaussian blurred texture lookup using 4 linear - // taps. It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3)); - // Calculate combined weights and linear sample ratios between texel pairs. - // The center texel (with weight w0) is used twice, so halve its weight. - const float w01 = w0 * 0.5 + w1; - const float w23 = w2 + w3; - const float w01_ratio = w1/w01; - const float w23_ratio = w3/w23; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w23 * tex2D_linearize(tex, tex_uv - (2.0 + w23_ratio) * dxdy, input_gamma).rgb; - sum += w01 * tex2D_linearize(tex, tex_uv - w01_ratio * dxdy, input_gamma).rgb; - sum += w01 * tex2D_linearize(tex, tex_uv + w01_ratio * dxdy, input_gamma).rgb; - sum += w23 * tex2D_linearize(tex, tex_uv + (2.0 + w23_ratio) * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur5fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 5x Gaussian blurred texture lookup using 1 nearest - // neighbor and 2 linear taps. It may be mipmapped depending - // on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2)); - // Calculate combined weights and linear sample ratios between texel pairs. - const float w12 = w1 + w2; - const float w12_ratio = w2/w12; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w12 * tex2D_linearize(tex, tex_uv - (1.0 + w12_ratio) * dxdy, input_gamma).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv, input_gamma).rgb; - sum += w12 * tex2D_linearize(tex, tex_uv + (1.0 + w12_ratio) * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur3fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 3x Gaussian blurred texture lookup using 2 linear - // taps. It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * w1); - // Calculate combined weights and linear sample ratios between texel pairs. - // The center texel (with weight w0) is used twice, so halve its weight. - const float w01 = w0 * 0.5 + w1; - const float w01_ratio = w1/w01; - // Weights for all samples are the same, so just average them: - return 0.5 * ( - tex2D_linearize(tex, tex_uv - w01_ratio * dxdy, input_gamma).rgb + - tex2D_linearize(tex, tex_uv + w01_ratio * dxdy, input_gamma).rgb); -} - - -//////////////////////////// HUGE SEPARABLE BLURS //////////////////////////// - -// Huge separable blurs come only in "fast" versions. -float3 tex2Dblur43fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 43x Gaussian blurred texture lookup using 22 linear - // taps. It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float w6 = exp(-36.0 * denom_inv); - const float w7 = exp(-49.0 * denom_inv); - const float w8 = exp(-64.0 * denom_inv); - const float w9 = exp(-81.0 * denom_inv); - const float w10 = exp(-100.0 * denom_inv); - const float w11 = exp(-121.0 * denom_inv); - const float w12 = exp(-144.0 * denom_inv); - const float w13 = exp(-169.0 * denom_inv); - const float w14 = exp(-196.0 * denom_inv); - const float w15 = exp(-225.0 * denom_inv); - const float w16 = exp(-256.0 * denom_inv); - const float w17 = exp(-289.0 * denom_inv); - const float w18 = exp(-324.0 * denom_inv); - const float w19 = exp(-361.0 * denom_inv); - const float w20 = exp(-400.0 * denom_inv); - const float w21 = exp(-441.0 * denom_inv); - //const float weight_sum_inv = 1.0 / - // (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + - // w12 + w13 + w14 + w15 + w16 + w17 + w18 + w19 + w20 + w21)); - const float weight_sum_inv = get_fast_gaussian_weight_sum_inv(sigma); - // Calculate combined weights and linear sample ratios between texel pairs. - // The center texel (with weight w0) is used twice, so halve its weight. - const float w0_1 = w0 * 0.5 + w1; - const float w2_3 = w2 + w3; - const float w4_5 = w4 + w5; - const float w6_7 = w6 + w7; - const float w8_9 = w8 + w9; - const float w10_11 = w10 + w11; - const float w12_13 = w12 + w13; - const float w14_15 = w14 + w15; - const float w16_17 = w16 + w17; - const float w18_19 = w18 + w19; - const float w20_21 = w20 + w21; - const float w0_1_ratio = w1/w0_1; - const float w2_3_ratio = w3/w2_3; - const float w4_5_ratio = w5/w4_5; - const float w6_7_ratio = w7/w6_7; - const float w8_9_ratio = w9/w8_9; - const float w10_11_ratio = w11/w10_11; - const float w12_13_ratio = w13/w12_13; - const float w14_15_ratio = w15/w14_15; - const float w16_17_ratio = w17/w16_17; - const float w18_19_ratio = w19/w18_19; - const float w20_21_ratio = w21/w20_21; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w20_21 * tex2D_linearize(tex, tex_uv - (20.0 + w20_21_ratio) * dxdy, input_gamma).rgb; - sum += w18_19 * tex2D_linearize(tex, tex_uv - (18.0 + w18_19_ratio) * dxdy, input_gamma).rgb; - sum += w16_17 * tex2D_linearize(tex, tex_uv - (16.0 + w16_17_ratio) * dxdy, input_gamma).rgb; - sum += w14_15 * tex2D_linearize(tex, tex_uv - (14.0 + w14_15_ratio) * dxdy, input_gamma).rgb; - sum += w12_13 * tex2D_linearize(tex, tex_uv - (12.0 + w12_13_ratio) * dxdy, input_gamma).rgb; - sum += w10_11 * tex2D_linearize(tex, tex_uv - (10.0 + w10_11_ratio) * dxdy, input_gamma).rgb; - sum += w8_9 * tex2D_linearize(tex, tex_uv - (8.0 + w8_9_ratio) * dxdy, input_gamma).rgb; - sum += w6_7 * tex2D_linearize(tex, tex_uv - (6.0 + w6_7_ratio) * dxdy, input_gamma).rgb; - sum += w4_5 * tex2D_linearize(tex, tex_uv - (4.0 + w4_5_ratio) * dxdy, input_gamma).rgb; - sum += w2_3 * tex2D_linearize(tex, tex_uv - (2.0 + w2_3_ratio) * dxdy, input_gamma).rgb; - sum += w0_1 * tex2D_linearize(tex, tex_uv - w0_1_ratio * dxdy, input_gamma).rgb; - sum += w0_1 * tex2D_linearize(tex, tex_uv + w0_1_ratio * dxdy, input_gamma).rgb; - sum += w2_3 * tex2D_linearize(tex, tex_uv + (2.0 + w2_3_ratio) * dxdy, input_gamma).rgb; - sum += w4_5 * tex2D_linearize(tex, tex_uv + (4.0 + w4_5_ratio) * dxdy, input_gamma).rgb; - sum += w6_7 * tex2D_linearize(tex, tex_uv + (6.0 + w6_7_ratio) * dxdy, input_gamma).rgb; - sum += w8_9 * tex2D_linearize(tex, tex_uv + (8.0 + w8_9_ratio) * dxdy, input_gamma).rgb; - sum += w10_11 * tex2D_linearize(tex, tex_uv + (10.0 + w10_11_ratio) * dxdy, input_gamma).rgb; - sum += w12_13 * tex2D_linearize(tex, tex_uv + (12.0 + w12_13_ratio) * dxdy, input_gamma).rgb; - sum += w14_15 * tex2D_linearize(tex, tex_uv + (14.0 + w14_15_ratio) * dxdy, input_gamma).rgb; - sum += w16_17 * tex2D_linearize(tex, tex_uv + (16.0 + w16_17_ratio) * dxdy, input_gamma).rgb; - sum += w18_19 * tex2D_linearize(tex, tex_uv + (18.0 + w18_19_ratio) * dxdy, input_gamma).rgb; - sum += w20_21 * tex2D_linearize(tex, tex_uv + (20.0 + w20_21_ratio) * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur31fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 31x Gaussian blurred texture lookup using 16 linear - // taps. It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float w6 = exp(-36.0 * denom_inv); - const float w7 = exp(-49.0 * denom_inv); - const float w8 = exp(-64.0 * denom_inv); - const float w9 = exp(-81.0 * denom_inv); - const float w10 = exp(-100.0 * denom_inv); - const float w11 = exp(-121.0 * denom_inv); - const float w12 = exp(-144.0 * denom_inv); - const float w13 = exp(-169.0 * denom_inv); - const float w14 = exp(-196.0 * denom_inv); - const float w15 = exp(-225.0 * denom_inv); - //const float weight_sum_inv = 1.0 / - // (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + - // w9 + w10 + w11 + w12 + w13 + w14 + w15)); - const float weight_sum_inv = get_fast_gaussian_weight_sum_inv(sigma); - // Calculate combined weights and linear sample ratios between texel pairs. - // The center texel (with weight w0) is used twice, so halve its weight. - const float w0_1 = w0 * 0.5 + w1; - const float w2_3 = w2 + w3; - const float w4_5 = w4 + w5; - const float w6_7 = w6 + w7; - const float w8_9 = w8 + w9; - const float w10_11 = w10 + w11; - const float w12_13 = w12 + w13; - const float w14_15 = w14 + w15; - const float w0_1_ratio = w1/w0_1; - const float w2_3_ratio = w3/w2_3; - const float w4_5_ratio = w5/w4_5; - const float w6_7_ratio = w7/w6_7; - const float w8_9_ratio = w9/w8_9; - const float w10_11_ratio = w11/w10_11; - const float w12_13_ratio = w13/w12_13; - const float w14_15_ratio = w15/w14_15; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w14_15 * tex2D_linearize(tex, tex_uv - (14.0 + w14_15_ratio) * dxdy, input_gamma).rgb; - sum += w12_13 * tex2D_linearize(tex, tex_uv - (12.0 + w12_13_ratio) * dxdy, input_gamma).rgb; - sum += w10_11 * tex2D_linearize(tex, tex_uv - (10.0 + w10_11_ratio) * dxdy, input_gamma).rgb; - sum += w8_9 * tex2D_linearize(tex, tex_uv - (8.0 + w8_9_ratio) * dxdy, input_gamma).rgb; - sum += w6_7 * tex2D_linearize(tex, tex_uv - (6.0 + w6_7_ratio) * dxdy, input_gamma).rgb; - sum += w4_5 * tex2D_linearize(tex, tex_uv - (4.0 + w4_5_ratio) * dxdy, input_gamma).rgb; - sum += w2_3 * tex2D_linearize(tex, tex_uv - (2.0 + w2_3_ratio) * dxdy, input_gamma).rgb; - sum += w0_1 * tex2D_linearize(tex, tex_uv - w0_1_ratio * dxdy, input_gamma).rgb; - sum += w0_1 * tex2D_linearize(tex, tex_uv + w0_1_ratio * dxdy, input_gamma).rgb; - sum += w2_3 * tex2D_linearize(tex, tex_uv + (2.0 + w2_3_ratio) * dxdy, input_gamma).rgb; - sum += w4_5 * tex2D_linearize(tex, tex_uv + (4.0 + w4_5_ratio) * dxdy, input_gamma).rgb; - sum += w6_7 * tex2D_linearize(tex, tex_uv + (6.0 + w6_7_ratio) * dxdy, input_gamma).rgb; - sum += w8_9 * tex2D_linearize(tex, tex_uv + (8.0 + w8_9_ratio) * dxdy, input_gamma).rgb; - sum += w10_11 * tex2D_linearize(tex, tex_uv + (10.0 + w10_11_ratio) * dxdy, input_gamma).rgb; - sum += w12_13 * tex2D_linearize(tex, tex_uv + (12.0 + w12_13_ratio) * dxdy, input_gamma).rgb; - sum += w14_15 * tex2D_linearize(tex, tex_uv + (14.0 + w14_15_ratio) * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur25fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 25x Gaussian blurred texture lookup using 1 nearest - // neighbor and 12 linear taps. It may be mipmapped depending - // on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float w6 = exp(-36.0 * denom_inv); - const float w7 = exp(-49.0 * denom_inv); - const float w8 = exp(-64.0 * denom_inv); - const float w9 = exp(-81.0 * denom_inv); - const float w10 = exp(-100.0 * denom_inv); - const float w11 = exp(-121.0 * denom_inv); - const float w12 = exp(-144.0 * denom_inv); - //const float weight_sum_inv = 1.0 / (w0 + 2.0 * ( - // w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12)); - const float weight_sum_inv = get_fast_gaussian_weight_sum_inv(sigma); - // Calculate combined weights and linear sample ratios between texel pairs. - const float w1_2 = w1 + w2; - const float w3_4 = w3 + w4; - const float w5_6 = w5 + w6; - const float w7_8 = w7 + w8; - const float w9_10 = w9 + w10; - const float w11_12 = w11 + w12; - const float w1_2_ratio = w2/w1_2; - const float w3_4_ratio = w4/w3_4; - const float w5_6_ratio = w6/w5_6; - const float w7_8_ratio = w8/w7_8; - const float w9_10_ratio = w10/w9_10; - const float w11_12_ratio = w12/w11_12; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w11_12 * tex2D_linearize(tex, tex_uv - (11.0 + w11_12_ratio) * dxdy, input_gamma).rgb; - sum += w9_10 * tex2D_linearize(tex, tex_uv - (9.0 + w9_10_ratio) * dxdy, input_gamma).rgb; - sum += w7_8 * tex2D_linearize(tex, tex_uv - (7.0 + w7_8_ratio) * dxdy, input_gamma).rgb; - sum += w5_6 * tex2D_linearize(tex, tex_uv - (5.0 + w5_6_ratio) * dxdy, input_gamma).rgb; - sum += w3_4 * tex2D_linearize(tex, tex_uv - (3.0 + w3_4_ratio) * dxdy, input_gamma).rgb; - sum += w1_2 * tex2D_linearize(tex, tex_uv - (1.0 + w1_2_ratio) * dxdy, input_gamma).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv, input_gamma).rgb; - sum += w1_2 * tex2D_linearize(tex, tex_uv + (1.0 + w1_2_ratio) * dxdy, input_gamma).rgb; - sum += w3_4 * tex2D_linearize(tex, tex_uv + (3.0 + w3_4_ratio) * dxdy, input_gamma).rgb; - sum += w5_6 * tex2D_linearize(tex, tex_uv + (5.0 + w5_6_ratio) * dxdy, input_gamma).rgb; - sum += w7_8 * tex2D_linearize(tex, tex_uv + (7.0 + w7_8_ratio) * dxdy, input_gamma).rgb; - sum += w9_10 * tex2D_linearize(tex, tex_uv + (9.0 + w9_10_ratio) * dxdy, input_gamma).rgb; - sum += w11_12 * tex2D_linearize(tex, tex_uv + (11.0 + w11_12_ratio) * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur17fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 17x Gaussian blurred texture lookup using 1 nearest - // neighbor and 8 linear taps. It may be mipmapped depending - // on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float w6 = exp(-36.0 * denom_inv); - const float w7 = exp(-49.0 * denom_inv); - const float w8 = exp(-64.0 * denom_inv); - //const float weight_sum_inv = 1.0 / (w0 + 2.0 * ( - // w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8)); - const float weight_sum_inv = get_fast_gaussian_weight_sum_inv(sigma); - // Calculate combined weights and linear sample ratios between texel pairs. - const float w1_2 = w1 + w2; - const float w3_4 = w3 + w4; - const float w5_6 = w5 + w6; - const float w7_8 = w7 + w8; - const float w1_2_ratio = w2/w1_2; - const float w3_4_ratio = w4/w3_4; - const float w5_6_ratio = w6/w5_6; - const float w7_8_ratio = w8/w7_8; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = float3(0.0,0.0,0.0); - sum += w7_8 * tex2D_linearize(tex, tex_uv - (7.0 + w7_8_ratio) * dxdy, input_gamma).rgb; - sum += w5_6 * tex2D_linearize(tex, tex_uv - (5.0 + w5_6_ratio) * dxdy, input_gamma).rgb; - sum += w3_4 * tex2D_linearize(tex, tex_uv - (3.0 + w3_4_ratio) * dxdy, input_gamma).rgb; - sum += w1_2 * tex2D_linearize(tex, tex_uv - (1.0 + w1_2_ratio) * dxdy, input_gamma).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv, input_gamma).rgb; - sum += w1_2 * tex2D_linearize(tex, tex_uv + (1.0 + w1_2_ratio) * dxdy, input_gamma).rgb; - sum += w3_4 * tex2D_linearize(tex, tex_uv + (3.0 + w3_4_ratio) * dxdy, input_gamma).rgb; - sum += w5_6 * tex2D_linearize(tex, tex_uv + (5.0 + w5_6_ratio) * dxdy, input_gamma).rgb; - sum += w7_8 * tex2D_linearize(tex, tex_uv + (7.0 + w7_8_ratio) * dxdy, input_gamma).rgb; - return sum * weight_sum_inv; -} - - -//////////////////// ARBITRARILY RESIZABLE ONE-PASS BLURS //////////////////// - -float3 tex2Dblur3x3resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 3x3 Gaussian blurred mipmapped texture lookup of the - // resized input. - // Description: - // This is the only arbitrarily resizable one-pass blur; tex2Dblur5x5resize - // would perform like tex2Dblur9x9, MUCH slower than tex2Dblur5resize. - const float denom_inv = 0.5/(sigma*sigma); - // Load each sample. We need all 3x3 samples. Quad-pixel communication - // won't help either: This should perform like tex2Dblur5x5, but sharing a - // 4x4 sample field would perform more like tex2Dblur8x8shared (worse). - const float2 sample4_uv = tex_uv; - const float2 dx = float2(dxdy.x, 0.0); - const float2 dy = float2(0.0, dxdy.y); - const float2 sample1_uv = sample4_uv - dy; - const float2 sample7_uv = sample4_uv + dy; - const float3 sample0 = tex2D_linearize(tex, sample1_uv - dx, input_gamma).rgb; - const float3 sample1 = tex2D_linearize(tex, sample1_uv, input_gamma).rgb; - const float3 sample2 = tex2D_linearize(tex, sample1_uv + dx, input_gamma).rgb; - const float3 sample3 = tex2D_linearize(tex, sample4_uv - dx, input_gamma).rgb; - const float3 sample4 = tex2D_linearize(tex, sample4_uv, input_gamma).rgb; - const float3 sample5 = tex2D_linearize(tex, sample4_uv + dx, input_gamma).rgb; - const float3 sample6 = tex2D_linearize(tex, sample7_uv - dx, input_gamma).rgb; - const float3 sample7 = tex2D_linearize(tex, sample7_uv, input_gamma).rgb; - const float3 sample8 = tex2D_linearize(tex, sample7_uv + dx, input_gamma).rgb; - // Statically compute Gaussian sample weights: - const float w4 = 1.0; - const float w1_3_5_7 = exp(-LENGTH_SQ(float2(1.0, 0.0)) * denom_inv); - const float w0_2_6_8 = exp(-LENGTH_SQ(float2(1.0, 1.0)) * denom_inv); - const float weight_sum_inv = 1.0/(w4 + 4.0 * (w1_3_5_7 + w0_2_6_8)); - // Weight and sum the samples: - const float3 sum = w4 * sample4 + - w1_3_5_7 * (sample1 + sample3 + sample5 + sample7) + - w0_2_6_8 * (sample0 + sample2 + sample6 + sample8); - return sum * weight_sum_inv; -} - - -//////////////////////////// FASTER ONE-PASS BLURS /////////////////////////// - -float3 tex2Dblur9x9(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Perform a 1-pass 9x9 blur with 5x5 bilinear samples. - // Requires: Same as tex2Dblur9() - // Returns: A 9x9 Gaussian blurred mipmapped texture lookup composed of - // 5x5 carefully selected bilinear samples. - // Description: - // Perform a 1-pass 9x9 blur with 5x5 bilinear samples. Adjust the - // bilinear sample location to reflect the true Gaussian weights for each - // underlying texel. The following diagram illustrates the relative - // locations of bilinear samples. Each sample with the same number has the - // same weight (notice the symmetry). The letters a, b, c, d distinguish - // quadrants, and the letters U, D, L, R, C (up, down, left, right, center) - // distinguish 1D directions along the line containing the pixel center: - // 6a 5a 2U 5b 6b - // 4a 3a 1U 3b 4b - // 2L 1L 0C 1R 2R - // 4c 3c 1D 3d 4d - // 6c 5c 2D 5d 6d - // The following diagram illustrates the underlying equally spaced texels, - // named after the sample that accesses them and subnamed by their location - // within their 2x2, 2x1, 1x2, or 1x1 texel block: - // 6a4 6a3 5a4 5a3 2U2 5b3 5b4 6b3 6b4 - // 6a2 6a1 5a2 5a1 2U1 5b1 5b2 6b1 6b2 - // 4a4 4a3 3a4 3a3 1U2 3b3 3b4 4b3 4b4 - // 4a2 4a1 3a2 3a1 1U1 3b1 3b2 4b1 4b2 - // 2L2 2L1 1L2 1L1 0C1 1R1 1R2 2R1 2R2 - // 4c2 4c1 3c2 3c1 1D1 3d1 3d2 4d1 4d2 - // 4c4 4c3 3c4 3c3 1D2 3d3 3d4 4d3 4d4 - // 6c2 6c1 5c2 5c1 2D1 5d1 5d2 6d1 6d2 - // 6c4 6c3 5c4 5c3 2D2 5d3 5d4 6d3 6d4 - // Note there is only one C texel and only two texels for each U, D, L, or - // R sample. The center sample is effectively a nearest neighbor sample, - // and the U/D/L/R samples use 1D linear filtering. All other texels are - // read with bilinear samples somewhere within their 2x2 texel blocks. - - // COMPUTE TEXTURE COORDS: - // Statically compute sampling offsets within each 2x2 texel block, based - // on 1D sampling ratios between texels [1, 2] and [3, 4] texels away from - // the center, and reuse them independently for both dimensions. Compute - // these offsets based on the relative 1D Gaussian weights of the texels - // in question. (w1off means "Gaussian weight for the texel 1.0 texels - // away from the pixel center," etc.). - const float denom_inv = 0.5/(sigma*sigma); - const float w1off = exp(-1.0 * denom_inv); - const float w2off = exp(-4.0 * denom_inv); - const float w3off = exp(-9.0 * denom_inv); - const float w4off = exp(-16.0 * denom_inv); - const float texel1to2ratio = w2off/(w1off + w2off); - const float texel3to4ratio = w4off/(w3off + w4off); - // Statically compute texel offsets from the fragment center to each - // bilinear sample in the bottom-right quadrant, including x-axis-aligned: - const float2 sample1R_texel_offset = float2(1.0, 0.0) + float2(texel1to2ratio, 0.0); - const float2 sample2R_texel_offset = float2(3.0, 0.0) + float2(texel3to4ratio, 0.0); - const float2 sample3d_texel_offset = float2(1.0, 1.0) + float2(texel1to2ratio, texel1to2ratio); - const float2 sample4d_texel_offset = float2(3.0, 1.0) + float2(texel3to4ratio, texel1to2ratio); - const float2 sample5d_texel_offset = float2(1.0, 3.0) + float2(texel1to2ratio, texel3to4ratio); - const float2 sample6d_texel_offset = float2(3.0, 3.0) + float2(texel3to4ratio, texel3to4ratio); - - // CALCULATE KERNEL WEIGHTS FOR ALL SAMPLES: - // Statically compute Gaussian texel weights for the bottom-right quadrant. - // Read underscores as "and." - const float w1R1 = w1off; - const float w1R2 = w2off; - const float w2R1 = w3off; - const float w2R2 = w4off; - const float w3d1 = exp(-LENGTH_SQ(float2(1.0, 1.0)) * denom_inv); - const float w3d2_3d3 = exp(-LENGTH_SQ(float2(2.0, 1.0)) * denom_inv); - const float w3d4 = exp(-LENGTH_SQ(float2(2.0, 2.0)) * denom_inv); - const float w4d1_5d1 = exp(-LENGTH_SQ(float2(3.0, 1.0)) * denom_inv); - const float w4d2_5d3 = exp(-LENGTH_SQ(float2(4.0, 1.0)) * denom_inv); - const float w4d3_5d2 = exp(-LENGTH_SQ(float2(3.0, 2.0)) * denom_inv); - const float w4d4_5d4 = exp(-LENGTH_SQ(float2(4.0, 2.0)) * denom_inv); - const float w6d1 = exp(-LENGTH_SQ(float2(3.0, 3.0)) * denom_inv); - const float w6d2_6d3 = exp(-LENGTH_SQ(float2(4.0, 3.0)) * denom_inv); - const float w6d4 = exp(-LENGTH_SQ(float2(4.0, 4.0)) * denom_inv); - // Statically add texel weights in each sample to get sample weights: - const float w0 = 1.0; - const float w1 = w1R1 + w1R2; - const float w2 = w2R1 + w2R2; - const float w3 = w3d1 + 2.0 * w3d2_3d3 + w3d4; - const float w4 = w4d1_5d1 + w4d2_5d3 + w4d3_5d2 + w4d4_5d4; - const float w5 = w4; - const float w6 = w6d1 + 2.0 * w6d2_6d3 + w6d4; - // Get the weight sum inverse (normalization factor): - const float weight_sum_inv = - 1.0/(w0 + 4.0 * (w1 + w2 + w3 + w4 + w5 + w6)); - - // LOAD TEXTURE SAMPLES: - // Load all 25 samples (1 nearest, 8 linear, 16 bilinear) using symmetry: - const float2 mirror_x = float2(-1.0, 1.0); - const float2 mirror_y = float2(1.0, -1.0); - const float2 mirror_xy = float2(-1.0, -1.0); - const float2 dxdy_mirror_x = dxdy * mirror_x; - const float2 dxdy_mirror_y = dxdy * mirror_y; - const float2 dxdy_mirror_xy = dxdy * mirror_xy; - // Sampling order doesn't seem to affect performance, so just be clear: - const float3 sample0C = tex2D_linearize(tex, tex_uv, input_gamma).rgb; - const float3 sample1R = tex2D_linearize(tex, tex_uv + dxdy * sample1R_texel_offset, input_gamma).rgb; - const float3 sample1D = tex2D_linearize(tex, tex_uv + dxdy * sample1R_texel_offset.yx, input_gamma).rgb; - const float3 sample1L = tex2D_linearize(tex, tex_uv - dxdy * sample1R_texel_offset, input_gamma).rgb; - const float3 sample1U = tex2D_linearize(tex, tex_uv - dxdy * sample1R_texel_offset.yx, input_gamma).rgb; - const float3 sample2R = tex2D_linearize(tex, tex_uv + dxdy * sample2R_texel_offset, input_gamma).rgb; - const float3 sample2D = tex2D_linearize(tex, tex_uv + dxdy * sample2R_texel_offset.yx, input_gamma).rgb; - const float3 sample2L = tex2D_linearize(tex, tex_uv - dxdy * sample2R_texel_offset, input_gamma).rgb; - const float3 sample2U = tex2D_linearize(tex, tex_uv - dxdy * sample2R_texel_offset.yx, input_gamma).rgb; - const float3 sample3d = tex2D_linearize(tex, tex_uv + dxdy * sample3d_texel_offset, input_gamma).rgb; - const float3 sample3c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample3d_texel_offset, input_gamma).rgb; - const float3 sample3b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample3d_texel_offset, input_gamma).rgb; - const float3 sample3a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample3d_texel_offset, input_gamma).rgb; - const float3 sample4d = tex2D_linearize(tex, tex_uv + dxdy * sample4d_texel_offset, input_gamma).rgb; - const float3 sample4c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample4d_texel_offset, input_gamma).rgb; - const float3 sample4b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample4d_texel_offset, input_gamma).rgb; - const float3 sample4a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample4d_texel_offset, input_gamma).rgb; - const float3 sample5d = tex2D_linearize(tex, tex_uv + dxdy * sample5d_texel_offset, input_gamma).rgb; - const float3 sample5c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample5d_texel_offset, input_gamma).rgb; - const float3 sample5b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample5d_texel_offset, input_gamma).rgb; - const float3 sample5a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample5d_texel_offset, input_gamma).rgb; - const float3 sample6d = tex2D_linearize(tex, tex_uv + dxdy * sample6d_texel_offset, input_gamma).rgb; - const float3 sample6c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample6d_texel_offset, input_gamma).rgb; - const float3 sample6b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample6d_texel_offset, input_gamma).rgb; - const float3 sample6a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample6d_texel_offset, input_gamma).rgb; - - // SUM WEIGHTED SAMPLES: - // Statically normalize weights (so total = 1.0), and sum weighted samples. - float3 sum = w0 * sample0C; - sum += w1 * (sample1R + sample1D + sample1L + sample1U); - sum += w2 * (sample2R + sample2D + sample2L + sample2U); - sum += w3 * (sample3d + sample3c + sample3b + sample3a); - sum += w4 * (sample4d + sample4c + sample4b + sample4a); - sum += w5 * (sample5d + sample5c + sample5b + sample5a); - sum += w6 * (sample6d + sample6c + sample6b + sample6a); - return sum * weight_sum_inv; -} - -float3 tex2Dblur7x7(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Perform a 1-pass 7x7 blur with 5x5 bilinear samples. - // Requires: Same as tex2Dblur9() - // Returns: A 7x7 Gaussian blurred mipmapped texture lookup composed of - // 4x4 carefully selected bilinear samples. - // Description: - // First see the descriptions for tex2Dblur9x9() and tex2Dblur7(). This - // blur mixes concepts from both. The sample layout is as follows: - // 4a 3a 3b 4b - // 2a 1a 1b 2b - // 2c 1c 1d 2d - // 4c 3c 3d 4d - // The texel layout is as follows. Note that samples 3a/3b, 1a/1b, 1c/1d, - // and 3c/3d share a vertical column of texels, and samples 2a/2c, 1a/1c, - // 1b/1d, and 2b/2d share a horizontal row of texels (all sample1's share - // the center texel): - // 4a4 4a3 3a4 3ab3 3b4 4b3 4b4 - // 4a2 4a1 3a2 3ab1 3b2 4b1 4b2 - // 2a4 2a3 1a4 1ab3 1b4 2b3 2b4 - // 2ac2 2ac1 1ac2 1* 1bd2 2bd1 2bd2 - // 2c4 2c3 1c4 1cd3 1d4 2d3 2d4 - // 4c2 4c1 3c2 3cd1 3d2 4d1 4d2 - // 4c4 4c3 3c4 3cd3 3d4 4d3 4d4 - - // COMPUTE TEXTURE COORDS: - // Statically compute bilinear sampling offsets (details in tex2Dblur9x9). - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w1off = exp(-1.0 * denom_inv); - const float w2off = exp(-4.0 * denom_inv); - const float w3off = exp(-9.0 * denom_inv); - const float texel0to1ratio = w1off/(w0off * 0.5 + w1off); - const float texel2to3ratio = w3off/(w2off + w3off); - // Statically compute texel offsets from the fragment center to each - // bilinear sample in the bottom-right quadrant, including axis-aligned: - const float2 sample1d_texel_offset = float2(texel0to1ratio, texel0to1ratio); - const float2 sample2d_texel_offset = float2(2.0, 0.0) + float2(texel2to3ratio, texel0to1ratio); - const float2 sample3d_texel_offset = float2(0.0, 2.0) + float2(texel0to1ratio, texel2to3ratio); - const float2 sample4d_texel_offset = float2(2.0, 2.0) + float2(texel2to3ratio, texel2to3ratio); - - // CALCULATE KERNEL WEIGHTS FOR ALL SAMPLES: - // Statically compute Gaussian texel weights for the bottom-right quadrant. - // Read underscores as "and." - const float w1abcd = 1.0; - const float w1bd2_1cd3 = exp(-LENGTH_SQ(float2(1.0, 0.0)) * denom_inv); - const float w2bd1_3cd1 = exp(-LENGTH_SQ(float2(2.0, 0.0)) * denom_inv); - const float w2bd2_3cd2 = exp(-LENGTH_SQ(float2(3.0, 0.0)) * denom_inv); - const float w1d4 = exp(-LENGTH_SQ(float2(1.0, 1.0)) * denom_inv); - const float w2d3_3d2 = exp(-LENGTH_SQ(float2(2.0, 1.0)) * denom_inv); - const float w2d4_3d4 = exp(-LENGTH_SQ(float2(3.0, 1.0)) * denom_inv); - const float w4d1 = exp(-LENGTH_SQ(float2(2.0, 2.0)) * denom_inv); - const float w4d2_4d3 = exp(-LENGTH_SQ(float2(3.0, 2.0)) * denom_inv); - const float w4d4 = exp(-LENGTH_SQ(float2(3.0, 3.0)) * denom_inv); - // Statically add texel weights in each sample to get sample weights. - // Split weights for shared texels between samples sharing them: - const float w1 = w1abcd * 0.25 + w1bd2_1cd3 + w1d4; - const float w2_3 = (w2bd1_3cd1 + w2bd2_3cd2) * 0.5 + w2d3_3d2 + w2d4_3d4; - const float w4 = w4d1 + 2.0 * w4d2_4d3 + w4d4; - // Get the weight sum inverse (normalization factor): - const float weight_sum_inv = - 1.0/(4.0 * (w1 + 2.0 * w2_3 + w4)); - - // LOAD TEXTURE SAMPLES: - // Load all 16 samples using symmetry: - const float2 mirror_x = float2(-1.0, 1.0); - const float2 mirror_y = float2(1.0, -1.0); - const float2 mirror_xy = float2(-1.0, -1.0); - const float2 dxdy_mirror_x = dxdy * mirror_x; - const float2 dxdy_mirror_y = dxdy * mirror_y; - const float2 dxdy_mirror_xy = dxdy * mirror_xy; - const float3 sample1a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample1d_texel_offset, input_gamma).rgb; - const float3 sample2a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample2d_texel_offset, input_gamma).rgb; - const float3 sample3a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample3d_texel_offset, input_gamma).rgb; - const float3 sample4a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample4d_texel_offset, input_gamma).rgb; - const float3 sample1b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample1d_texel_offset, input_gamma).rgb; - const float3 sample2b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample2d_texel_offset, input_gamma).rgb; - const float3 sample3b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample3d_texel_offset, input_gamma).rgb; - const float3 sample4b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample4d_texel_offset, input_gamma).rgb; - const float3 sample1c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample1d_texel_offset, input_gamma).rgb; - const float3 sample2c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample2d_texel_offset, input_gamma).rgb; - const float3 sample3c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample3d_texel_offset, input_gamma).rgb; - const float3 sample4c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample4d_texel_offset, input_gamma).rgb; - const float3 sample1d = tex2D_linearize(tex, tex_uv + dxdy * sample1d_texel_offset, input_gamma).rgb; - const float3 sample2d = tex2D_linearize(tex, tex_uv + dxdy * sample2d_texel_offset, input_gamma).rgb; - const float3 sample3d = tex2D_linearize(tex, tex_uv + dxdy * sample3d_texel_offset, input_gamma).rgb; - const float3 sample4d = tex2D_linearize(tex, tex_uv + dxdy * sample4d_texel_offset, input_gamma).rgb; - - // SUM WEIGHTED SAMPLES: - // Statically normalize weights (so total = 1.0), and sum weighted samples. - float3 sum = float3(0.0,0.0,0.0); - sum += w1 * (sample1a + sample1b + sample1c + sample1d); - sum += w2_3 * (sample2a + sample2b + sample2c + sample2d); - sum += w2_3 * (sample3a + sample3b + sample3c + sample3d); - sum += w4 * (sample4a + sample4b + sample4c + sample4d); - return sum * weight_sum_inv; -} - -float3 tex2Dblur5x5(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Perform a 1-pass 5x5 blur with 3x3 bilinear samples. - // Requires: Same as tex2Dblur9() - // Returns: A 5x5 Gaussian blurred mipmapped texture lookup composed of - // 3x3 carefully selected bilinear samples. - // Description: - // First see the description for tex2Dblur9x9(). This blur uses the same - // concept and sample/texel locations except on a smaller scale. Samples: - // 2a 1U 2b - // 1L 0C 1R - // 2c 1D 2d - // Texels: - // 2a4 2a3 1U2 2b3 2b4 - // 2a2 2a1 1U1 2b1 2b2 - // 1L2 1L1 0C1 1R1 1R2 - // 2c2 2c1 1D1 2d1 2d2 - // 2c4 2c3 1D2 2d3 2d4 - - // COMPUTE TEXTURE COORDS: - // Statically compute bilinear sampling offsets (details in tex2Dblur9x9). - const float denom_inv = 0.5/(sigma*sigma); - const float w1off = exp(-1.0 * denom_inv); - const float w2off = exp(-4.0 * denom_inv); - const float texel1to2ratio = w2off/(w1off + w2off); - // Statically compute texel offsets from the fragment center to each - // bilinear sample in the bottom-right quadrant, including x-axis-aligned: - const float2 sample1R_texel_offset = float2(1.0, 0.0) + float2(texel1to2ratio, 0.0); - const float2 sample2d_texel_offset = float2(1.0, 1.0) + float2(texel1to2ratio, texel1to2ratio); - - // CALCULATE KERNEL WEIGHTS FOR ALL SAMPLES: - // Statically compute Gaussian texel weights for the bottom-right quadrant. - // Read underscores as "and." - const float w1R1 = w1off; - const float w1R2 = w2off; - const float w2d1 = exp(-LENGTH_SQ(float2(1.0, 1.0)) * denom_inv); - const float w2d2_3 = exp(-LENGTH_SQ(float2(2.0, 1.0)) * denom_inv); - const float w2d4 = exp(-LENGTH_SQ(float2(2.0, 2.0)) * denom_inv); - // Statically add texel weights in each sample to get sample weights: - const float w0 = 1.0; - const float w1 = w1R1 + w1R2; - const float w2 = w2d1 + 2.0 * w2d2_3 + w2d4; - // Get the weight sum inverse (normalization factor): - const float weight_sum_inv = 1.0/(w0 + 4.0 * (w1 + w2)); - - // LOAD TEXTURE SAMPLES: - // Load all 9 samples (1 nearest, 4 linear, 4 bilinear) using symmetry: - const float2 mirror_x = float2(-1.0, 1.0); - const float2 mirror_y = float2(1.0, -1.0); - const float2 mirror_xy = float2(-1.0, -1.0); - const float2 dxdy_mirror_x = dxdy * mirror_x; - const float2 dxdy_mirror_y = dxdy * mirror_y; - const float2 dxdy_mirror_xy = dxdy * mirror_xy; - const float3 sample0C = tex2D_linearize(tex, tex_uv, input_gamma).rgb; - const float3 sample1R = tex2D_linearize(tex, tex_uv + dxdy * sample1R_texel_offset, input_gamma).rgb; - const float3 sample1D = tex2D_linearize(tex, tex_uv + dxdy * sample1R_texel_offset.yx, input_gamma).rgb; - const float3 sample1L = tex2D_linearize(tex, tex_uv - dxdy * sample1R_texel_offset, input_gamma).rgb; - const float3 sample1U = tex2D_linearize(tex, tex_uv - dxdy * sample1R_texel_offset.yx, input_gamma).rgb; - const float3 sample2d = tex2D_linearize(tex, tex_uv + dxdy * sample2d_texel_offset, input_gamma).rgb; - const float3 sample2c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample2d_texel_offset, input_gamma).rgb; - const float3 sample2b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample2d_texel_offset, input_gamma).rgb; - const float3 sample2a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample2d_texel_offset, input_gamma).rgb; - - // SUM WEIGHTED SAMPLES: - // Statically normalize weights (so total = 1.0), and sum weighted samples. - float3 sum = w0 * sample0C; - sum += w1 * (sample1R + sample1D + sample1L + sample1U); - sum += w2 * (sample2a + sample2b + sample2c + sample2d); - return sum * weight_sum_inv; -} - -float3 tex2Dblur3x3(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma, - const float input_gamma) -{ - // Perform a 1-pass 3x3 blur with 5x5 bilinear samples. - // Requires: Same as tex2Dblur9() - // Returns: A 3x3 Gaussian blurred mipmapped texture lookup composed of - // 2x2 carefully selected bilinear samples. - // Description: - // First see the descriptions for tex2Dblur9x9() and tex2Dblur7(). This - // blur mixes concepts from both. The sample layout is as follows: - // 0a 0b - // 0c 0d - // The texel layout is as follows. Note that samples 0a/0b and 0c/0d share - // a vertical column of texels, and samples 0a/0c and 0b/0d share a - // horizontal row of texels (all samples share the center texel): - // 0a3 0ab2 0b3 - // 0ac1 0*0 0bd1 - // 0c3 0cd2 0d3 - - // COMPUTE TEXTURE COORDS: - // Statically compute bilinear sampling offsets (details in tex2Dblur9x9). - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w1off = exp(-1.0 * denom_inv); - const float texel0to1ratio = w1off/(w0off * 0.5 + w1off); - // Statically compute texel offsets from the fragment center to each - // bilinear sample in the bottom-right quadrant, including axis-aligned: - const float2 sample0d_texel_offset = float2(texel0to1ratio, texel0to1ratio); - - // LOAD TEXTURE SAMPLES: - // Load all 4 samples using symmetry: - const float2 mirror_x = float2(-1.0, 1.0); - const float2 mirror_y = float2(1.0, -1.0); - const float2 mirror_xy = float2(-1.0, -1.0); - const float2 dxdy_mirror_x = dxdy * mirror_x; - const float2 dxdy_mirror_y = dxdy * mirror_y; - const float2 dxdy_mirror_xy = dxdy * mirror_xy; - const float3 sample0a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample0d_texel_offset, input_gamma).rgb; - const float3 sample0b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample0d_texel_offset, input_gamma).rgb; - const float3 sample0c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample0d_texel_offset, input_gamma).rgb; - const float3 sample0d = tex2D_linearize(tex, tex_uv + dxdy * sample0d_texel_offset, input_gamma).rgb; - - // SUM WEIGHTED SAMPLES: - // Weights for all samples are the same, so just average them: - return 0.25 * (sample0a + sample0b + sample0c + sample0d); -} - - -////////////////// LINEAR ONE-PASS BLURS WITH SHARED SAMPLES ///////////////// - -float3 tex2Dblur12x12shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float sigma, - const float input_gamma) -{ - // Perform a 1-pass mipmapped blur with shared samples across a pixel quad. - // Requires: 1.) Same as tex2Dblur9() - // 2.) ddx() and ddy() are present in the current Cg profile. - // 3.) The GPU driver is using fine/high-quality derivatives. - // 4.) quad_vector *correctly* describes the current fragment's - // location in its pixel quad, by the conventions noted in - // get_quad_vector[_naive]. - // 5.) tex_uv.w = log2(video_size/output_size).y - // 6.) tex2Dlod() is present in the current Cg profile. - // Optional: Tune artifacts vs. excessive blurriness with the global - // float error_blurring. - // Returns: A blurred texture lookup using a "virtual" 12x12 Gaussian - // blur (a 6x6 blur of carefully selected bilinear samples) - // of the given mip level. There will be subtle inaccuracies, - // especially for small or high-frequency detailed sources. - // Description: - // Perform a 1-pass blur with shared texture lookups across a pixel quad. - // We'll get neighboring samples with high-quality ddx/ddy derivatives, as - // in GPU Pro 2, Chapter VI.2, "Shader Amortization using Pixel Quad - // Message Passing" by Eric Penner. - // - // Our "virtual" 12x12 blur will be comprised of ((6 - 1)^2)/4 + 3 = 12 - // bilinear samples, where bilinear sampling positions are computed from - // the relative Gaussian weights of the 4 surrounding texels. The catch is - // that the appropriate texel weights and sample coords differ for each - // fragment, but we're reusing most of the same samples across a quad of - // destination fragments. (We do use unique coords for the four nearest - // samples at each fragment.) Mixing bilinear filtering and sample-sharing - // therefore introduces some error into the weights, and this can get nasty - // when the source image is small or high-frequency. Computing bilinear - // ratios based on weights at the sample field center results in sharpening - // and ringing artifacts, but we can move samples closer to halfway between - // texels to try blurring away the error (which can move features around by - // a texel or so). Tune this with the global float "error_blurring". - // - // The pixel quad's sample field covers 12x12 texels, accessed through 6x6 - // bilinear (2x2 texel) taps. Each fragment depends on a window of 10x10 - // texels (5x5 bilinear taps), and each fragment is responsible for loading - // a 6x6 texel quadrant as a 3x3 block of bilinear taps, plus 3 more taps - // to use unique bilinear coords for sample0* for each fragment. This - // diagram illustrates the relative locations of bilinear samples 1-9 for - // each quadrant a, b, c, d (note samples will not be equally spaced): - // 8a 7a 6a 6b 7b 8b - // 5a 4a 3a 3b 4b 5b - // 2a 1a 0a 0b 1b 2b - // 2c 1c 0c 0d 1d 2d - // 5c 4c 3c 3d 4d 5d - // 8c 7c 6c 6d 7d 8d - // The following diagram illustrates the underlying equally spaced texels, - // named after the sample that accesses them and subnamed by their location - // within their 2x2 texel block: - // 8a3 8a2 7a3 7a2 6a3 6a2 6b2 6b3 7b2 7b3 8b2 8b3 - // 8a1 8a0 7a1 7a0 6a1 6a0 6b0 6b1 7b0 7b1 8b0 8b1 - // 5a3 5a2 4a3 4a2 3a3 3a2 3b2 3b3 4b2 4b3 5b2 5b3 - // 5a1 5a0 4a1 4a0 3a1 3a0 3b0 3b1 4b0 4b1 5b0 5b1 - // 2a3 2a2 1a3 1a2 0a3 0a2 0b2 0b3 1b2 1b3 2b2 2b3 - // 2a1 2a0 1a1 1a0 0a1 0a0 0b0 0b1 1b0 1b1 2b0 2b1 - // 2c1 2c0 1c1 1c0 0c1 0c0 0d0 0d1 1d0 1d1 2d0 2d1 - // 2c3 2c2 1c3 1c2 0c3 0c2 0d2 0d3 1d2 1d3 2d2 2d3 - // 5c1 5c0 4c1 4c0 3c1 3c0 3d0 3d1 4d0 4d1 5d0 5d1 - // 5c3 5c2 4c3 4c2 3c3 3c2 3d2 3d3 4d2 4d3 5d2 5d3 - // 8c1 8c0 7c1 7c0 6c1 6c0 6d0 6d1 7d0 7d1 8d0 8d1 - // 8c3 8c2 7c3 7c2 6c3 6c2 6d2 6d3 7d2 7d3 8d2 8d3 - // With this symmetric arrangement, we don't have to know which absolute - // quadrant a sample lies in to assign kernel weights; it's enough to know - // the sample number and the relative quadrant of the sample (relative to - // the current quadrant): - // {current, adjacent x, adjacent y, diagonal} - - // COMPUTE COORDS FOR TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Statically compute sampling offsets within each 2x2 texel block, based - // on appropriate 1D Gaussian sampling ratio between texels [0, 1], [2, 3], - // and [4, 5] away from the fragment, and reuse them independently for both - // dimensions. Use the sample field center as the estimated destination, - // but nudge the result closer to halfway between texels to blur error. - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w0_5off = exp(-(0.5*0.5) * denom_inv); - const float w1off = exp(-(1.0*1.0) * denom_inv); - const float w1_5off = exp(-(1.5*1.5) * denom_inv); - const float w2off = exp(-(2.0*2.0) * denom_inv); - const float w2_5off = exp(-(2.5*2.5) * denom_inv); - const float w3_5off = exp(-(3.5*3.5) * denom_inv); - const float w4_5off = exp(-(4.5*4.5) * denom_inv); - const float w5_5off = exp(-(5.5*5.5) * denom_inv); - const float texel0to1ratio = lerp(w1_5off/(w0_5off + w1_5off), 0.5, error_blurring); - const float texel2to3ratio = lerp(w3_5off/(w2_5off + w3_5off), 0.5, error_blurring); - const float texel4to5ratio = lerp(w5_5off/(w4_5off + w5_5off), 0.5, error_blurring); - // We don't share sample0*, so use the nearest destination fragment: - const float texel0to1ratio_nearest = w1off/(w0off + w1off); - const float texel1to2ratio_nearest = w2off/(w1off + w2off); - // Statically compute texel offsets from the bottom-right fragment to each - // bilinear sample in the bottom-right quadrant: - const float2 sample0curr_texel_offset = float2(0.0, 0.0) + float2(texel0to1ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjx_texel_offset = float2(-1.0, 0.0) + float2(-texel1to2ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjy_texel_offset = float2(0.0, -1.0) + float2(texel0to1ratio_nearest, -texel1to2ratio_nearest); - const float2 sample0diag_texel_offset = float2(-1.0, -1.0) + float2(-texel1to2ratio_nearest, -texel1to2ratio_nearest); - const float2 sample1_texel_offset = float2(2.0, 0.0) + float2(texel2to3ratio, texel0to1ratio); - const float2 sample2_texel_offset = float2(4.0, 0.0) + float2(texel4to5ratio, texel0to1ratio); - const float2 sample3_texel_offset = float2(0.0, 2.0) + float2(texel0to1ratio, texel2to3ratio); - const float2 sample4_texel_offset = float2(2.0, 2.0) + float2(texel2to3ratio, texel2to3ratio); - const float2 sample5_texel_offset = float2(4.0, 2.0) + float2(texel4to5ratio, texel2to3ratio); - const float2 sample6_texel_offset = float2(0.0, 4.0) + float2(texel0to1ratio, texel4to5ratio); - const float2 sample7_texel_offset = float2(2.0, 4.0) + float2(texel2to3ratio, texel4to5ratio); - const float2 sample8_texel_offset = float2(4.0, 4.0) + float2(texel4to5ratio, texel4to5ratio); - - // CALCULATE KERNEL WEIGHTS: - // Statically compute bilinear sample weights at each destination fragment - // based on the sum of their 4 underlying texel weights. Assume a same- - // resolution blur, so each symmetrically named sample weight will compute - // the same at every fragment in the pixel quad: We can therefore compute - // texel weights based only on the bottom-right quadrant (fragment at 0d0). - // Too avoid too much boilerplate code, use a macro to get all 4 texel - // weights for a bilinear sample based on the offset of its top-left texel: - #define GET_TEXEL_QUAD_WEIGHTS(xoff, yoff) \ - (exp(-LENGTH_SQ(float2(xoff, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff, yoff + 1.0)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff + 1.0)) * denom_inv)) - const float w8diag = GET_TEXEL_QUAD_WEIGHTS(-6.0, -6.0); - const float w7diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -6.0); - const float w6diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -6.0); - const float w6adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -6.0); - const float w7adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -6.0); - const float w8adjy = GET_TEXEL_QUAD_WEIGHTS(4.0, -6.0); - const float w5diag = GET_TEXEL_QUAD_WEIGHTS(-6.0, -4.0); - const float w4diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -4.0); - const float w3diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -4.0); - const float w3adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -4.0); - const float w4adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -4.0); - const float w5adjy = GET_TEXEL_QUAD_WEIGHTS(4.0, -4.0); - const float w2diag = GET_TEXEL_QUAD_WEIGHTS(-6.0, -2.0); - const float w1diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -2.0); - const float w0diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -2.0); - const float w0adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -2.0); - const float w1adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -2.0); - const float w2adjy = GET_TEXEL_QUAD_WEIGHTS(4.0, -2.0); - const float w2adjx = GET_TEXEL_QUAD_WEIGHTS(-6.0, 0.0); - const float w1adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 0.0); - const float w0adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 0.0); - const float w0curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 0.0); - const float w1curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 0.0); - const float w2curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 0.0); - const float w5adjx = GET_TEXEL_QUAD_WEIGHTS(-6.0, 2.0); - const float w4adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 2.0); - const float w3adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 2.0); - const float w3curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 2.0); - const float w4curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 2.0); - const float w5curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 2.0); - const float w8adjx = GET_TEXEL_QUAD_WEIGHTS(-6.0, 4.0); - const float w7adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 4.0); - const float w6adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 4.0); - const float w6curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 4.0); - const float w7curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 4.0); - const float w8curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 4.0); - #undef GET_TEXEL_QUAD_WEIGHTS - // Statically pack weights for runtime: - const float4 w0 = float4(w0curr, w0adjx, w0adjy, w0diag); - const float4 w1 = float4(w1curr, w1adjx, w1adjy, w1diag); - const float4 w2 = float4(w2curr, w2adjx, w2adjy, w2diag); - const float4 w3 = float4(w3curr, w3adjx, w3adjy, w3diag); - const float4 w4 = float4(w4curr, w4adjx, w4adjy, w4diag); - const float4 w5 = float4(w5curr, w5adjx, w5adjy, w5diag); - const float4 w6 = float4(w6curr, w6adjx, w6adjy, w6diag); - const float4 w7 = float4(w7curr, w7adjx, w7adjy, w7diag); - const float4 w8 = float4(w8curr, w8adjx, w8adjy, w8diag); - // Get the weight sum inverse (normalization factor): - const float4 weight_sum4 = w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8; - const float2 weight_sum2 = weight_sum4.xy + weight_sum4.zw; - const float weight_sum = weight_sum2.x + weight_sum2.y; - const float weight_sum_inv = 1.0/(weight_sum); - - // LOAD TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Get a uv vector from texel 0q0 of this quadrant to texel 0q3: - const float2 dxdy_curr = dxdy * quad_vector.xy; - // Load bilinear samples for the current quadrant (for this fragment): - const float3 sample0curr = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0curr_texel_offset, input_gamma).rgb; - const float3 sample0adjx = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjx_texel_offset, input_gamma).rgb; - const float3 sample0adjy = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjy_texel_offset, input_gamma).rgb; - const float3 sample0diag = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0diag_texel_offset, input_gamma).rgb; - const float3 sample1curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample1_texel_offset), input_gamma).rgb; - const float3 sample2curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample2_texel_offset), input_gamma).rgb; - const float3 sample3curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample3_texel_offset), input_gamma).rgb; - const float3 sample4curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample4_texel_offset), input_gamma).rgb; - const float3 sample5curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample5_texel_offset), input_gamma).rgb; - const float3 sample6curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample6_texel_offset), input_gamma).rgb; - const float3 sample7curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample7_texel_offset), input_gamma).rgb; - const float3 sample8curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample8_texel_offset), input_gamma).rgb; - - // GATHER NEIGHBORING SAMPLES AND SUM WEIGHTED SAMPLES: - // Fetch the samples from other fragments in the 2x2 quad: - float3 sample1adjx, sample1adjy, sample1diag; - float3 sample2adjx, sample2adjy, sample2diag; - float3 sample3adjx, sample3adjy, sample3diag; - float3 sample4adjx, sample4adjy, sample4diag; - float3 sample5adjx, sample5adjy, sample5diag; - float3 sample6adjx, sample6adjy, sample6diag; - float3 sample7adjx, sample7adjy, sample7diag; - float3 sample8adjx, sample8adjy, sample8diag; - quad_gather(quad_vector, sample1curr, sample1adjx, sample1adjy, sample1diag); - quad_gather(quad_vector, sample2curr, sample2adjx, sample2adjy, sample2diag); - quad_gather(quad_vector, sample3curr, sample3adjx, sample3adjy, sample3diag); - quad_gather(quad_vector, sample4curr, sample4adjx, sample4adjy, sample4diag); - quad_gather(quad_vector, sample5curr, sample5adjx, sample5adjy, sample5diag); - quad_gather(quad_vector, sample6curr, sample6adjx, sample6adjy, sample6diag); - quad_gather(quad_vector, sample7curr, sample7adjx, sample7adjy, sample7diag); - quad_gather(quad_vector, sample8curr, sample8adjx, sample8adjy, sample8diag); - // Statically normalize weights (so total = 1.0), and sum weighted samples. - // Fill each row of a matrix with an rgb sample and pre-multiply by the - // weights to obtain a weighted result: - float3 sum = float3(0.0,0.0,0.0); - sum += mul(w0, float4x3(sample0curr, sample0adjx, sample0adjy, sample0diag)); - sum += mul(w1, float4x3(sample1curr, sample1adjx, sample1adjy, sample1diag)); - sum += mul(w2, float4x3(sample2curr, sample2adjx, sample2adjy, sample2diag)); - sum += mul(w3, float4x3(sample3curr, sample3adjx, sample3adjy, sample3diag)); - sum += mul(w4, float4x3(sample4curr, sample4adjx, sample4adjy, sample4diag)); - sum += mul(w5, float4x3(sample5curr, sample5adjx, sample5adjy, sample5diag)); - sum += mul(w6, float4x3(sample6curr, sample6adjx, sample6adjy, sample6diag)); - sum += mul(w7, float4x3(sample7curr, sample7adjx, sample7adjy, sample7diag)); - sum += mul(w8, float4x3(sample8curr, sample8adjx, sample8adjy, sample8diag)); - return sum * weight_sum_inv; -} - -float3 tex2Dblur10x10shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float sigma, - const float input_gamma) -{ - // Perform a 1-pass mipmapped blur with shared samples across a pixel quad. - // Requires: Same as tex2Dblur12x12shared() - // Returns: A blurred texture lookup using a "virtual" 10x10 Gaussian - // blur (a 5x5 blur of carefully selected bilinear samples) - // of the given mip level. There will be subtle inaccuracies, - // especially for small or high-frequency detailed sources. - // Description: - // First see the description for tex2Dblur12x12shared(). This - // function shares the same concept and sample placement, but each fragment - // only uses 25 of the 36 samples taken across the pixel quad (to cover a - // 5x5 sample area, or 10x10 texel area), and it uses a lower standard - // deviation to compensate. Thanks to symmetry, the 11 omitted samples - // are always the "same:" - // 8adjx, 2adjx, 5adjx, - // 6adjy, 7adjy, 8adjy, - // 2diag, 5diag, 6diag, 7diag, 8diag - - // COMPUTE COORDS FOR TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Statically compute bilinear sampling offsets (details in tex2Dblur12x12shared). - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w0_5off = exp(-(0.5*0.5) * denom_inv); - const float w1off = exp(-(1.0*1.0) * denom_inv); - const float w1_5off = exp(-(1.5*1.5) * denom_inv); - const float w2off = exp(-(2.0*2.0) * denom_inv); - const float w2_5off = exp(-(2.5*2.5) * denom_inv); - const float w3_5off = exp(-(3.5*3.5) * denom_inv); - const float w4_5off = exp(-(4.5*4.5) * denom_inv); - const float w5_5off = exp(-(5.5*5.5) * denom_inv); - const float texel0to1ratio = lerp(w1_5off/(w0_5off + w1_5off), 0.5, error_blurring); - const float texel2to3ratio = lerp(w3_5off/(w2_5off + w3_5off), 0.5, error_blurring); - const float texel4to5ratio = lerp(w5_5off/(w4_5off + w5_5off), 0.5, error_blurring); - // We don't share sample0*, so use the nearest destination fragment: - const float texel0to1ratio_nearest = w1off/(w0off + w1off); - const float texel1to2ratio_nearest = w2off/(w1off + w2off); - // Statically compute texel offsets from the bottom-right fragment to each - // bilinear sample in the bottom-right quadrant: - const float2 sample0curr_texel_offset = float2(0.0, 0.0) + float2(texel0to1ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjx_texel_offset = float2(-1.0, 0.0) + float2(-texel1to2ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjy_texel_offset = float2(0.0, -1.0) + float2(texel0to1ratio_nearest, -texel1to2ratio_nearest); - const float2 sample0diag_texel_offset = float2(-1.0, -1.0) + float2(-texel1to2ratio_nearest, -texel1to2ratio_nearest); - const float2 sample1_texel_offset = float2(2.0, 0.0) + float2(texel2to3ratio, texel0to1ratio); - const float2 sample2_texel_offset = float2(4.0, 0.0) + float2(texel4to5ratio, texel0to1ratio); - const float2 sample3_texel_offset = float2(0.0, 2.0) + float2(texel0to1ratio, texel2to3ratio); - const float2 sample4_texel_offset = float2(2.0, 2.0) + float2(texel2to3ratio, texel2to3ratio); - const float2 sample5_texel_offset = float2(4.0, 2.0) + float2(texel4to5ratio, texel2to3ratio); - const float2 sample6_texel_offset = float2(0.0, 4.0) + float2(texel0to1ratio, texel4to5ratio); - const float2 sample7_texel_offset = float2(2.0, 4.0) + float2(texel2to3ratio, texel4to5ratio); - const float2 sample8_texel_offset = float2(4.0, 4.0) + float2(texel4to5ratio, texel4to5ratio); - - // CALCULATE KERNEL WEIGHTS: - // Statically compute bilinear sample weights at each destination fragment - // from the sum of their 4 texel weights (details in tex2Dblur12x12shared). - #define GET_TEXEL_QUAD_WEIGHTS(xoff, yoff) \ - (exp(-LENGTH_SQ(float2(xoff, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff, yoff + 1.0)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff + 1.0)) * denom_inv)) - // We only need 25 of the 36 sample weights. Skip the following weights: - // 8adjx, 2adjx, 5adjx, - // 6adjy, 7adjy, 8adjy, - // 2diag, 5diag, 6diag, 7diag, 8diag - const float w4diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -4.0); - const float w3diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -4.0); - const float w3adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -4.0); - const float w4adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -4.0); - const float w5adjy = GET_TEXEL_QUAD_WEIGHTS(4.0, -4.0); - const float w1diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -2.0); - const float w0diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -2.0); - const float w0adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -2.0); - const float w1adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -2.0); - const float w2adjy = GET_TEXEL_QUAD_WEIGHTS(4.0, -2.0); - const float w1adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 0.0); - const float w0adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 0.0); - const float w0curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 0.0); - const float w1curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 0.0); - const float w2curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 0.0); - const float w4adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 2.0); - const float w3adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 2.0); - const float w3curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 2.0); - const float w4curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 2.0); - const float w5curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 2.0); - const float w7adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 4.0); - const float w6adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 4.0); - const float w6curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 4.0); - const float w7curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 4.0); - const float w8curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 4.0); - #undef GET_TEXEL_QUAD_WEIGHTS - // Get the weight sum inverse (normalization factor): - const float weight_sum_inv = 1.0/(w0curr + w1curr + w2curr + w3curr + - w4curr + w5curr + w6curr + w7curr + w8curr + - w0adjx + w1adjx + w3adjx + w4adjx + w6adjx + w7adjx + - w0adjy + w1adjy + w2adjy + w3adjy + w4adjy + w5adjy + - w0diag + w1diag + w3diag + w4diag); - // Statically pack most weights for runtime. Note the mixed packing: - const float4 w0 = float4(w0curr, w0adjx, w0adjy, w0diag); - const float4 w1 = float4(w1curr, w1adjx, w1adjy, w1diag); - const float4 w3 = float4(w3curr, w3adjx, w3adjy, w3diag); - const float4 w4 = float4(w4curr, w4adjx, w4adjy, w4diag); - const float4 w2and5 = float4(w2curr, w2adjy, w5curr, w5adjy); - const float4 w6and7 = float4(w6curr, w6adjx, w7curr, w7adjx); - - // LOAD TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Get a uv vector from texel 0q0 of this quadrant to texel 0q3: - const float2 dxdy_curr = dxdy * quad_vector.xy; - // Load bilinear samples for the current quadrant (for this fragment): - const float3 sample0curr = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0curr_texel_offset, input_gamma).rgb; - const float3 sample0adjx = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjx_texel_offset, input_gamma).rgb; - const float3 sample0adjy = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjy_texel_offset, input_gamma).rgb; - const float3 sample0diag = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0diag_texel_offset, input_gamma).rgb; - const float3 sample1curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample1_texel_offset), input_gamma).rgb; - const float3 sample2curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample2_texel_offset), input_gamma).rgb; - const float3 sample3curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample3_texel_offset), input_gamma).rgb; - const float3 sample4curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample4_texel_offset), input_gamma).rgb; - const float3 sample5curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample5_texel_offset), input_gamma).rgb; - const float3 sample6curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample6_texel_offset), input_gamma).rgb; - const float3 sample7curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample7_texel_offset), input_gamma).rgb; - const float3 sample8curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample8_texel_offset), input_gamma).rgb; - - // GATHER NEIGHBORING SAMPLES AND SUM WEIGHTED SAMPLES: - // Fetch the samples from other fragments in the 2x2 quad in order of need: - float3 sample1adjx, sample1adjy, sample1diag; - float3 sample2adjx, sample2adjy, sample2diag; - float3 sample3adjx, sample3adjy, sample3diag; - float3 sample4adjx, sample4adjy, sample4diag; - float3 sample5adjx, sample5adjy, sample5diag; - float3 sample6adjx, sample6adjy, sample6diag; - float3 sample7adjx, sample7adjy, sample7diag; - quad_gather(quad_vector, sample1curr, sample1adjx, sample1adjy, sample1diag); - quad_gather(quad_vector, sample2curr, sample2adjx, sample2adjy, sample2diag); - quad_gather(quad_vector, sample3curr, sample3adjx, sample3adjy, sample3diag); - quad_gather(quad_vector, sample4curr, sample4adjx, sample4adjy, sample4diag); - quad_gather(quad_vector, sample5curr, sample5adjx, sample5adjy, sample5diag); - quad_gather(quad_vector, sample6curr, sample6adjx, sample6adjy, sample6diag); - quad_gather(quad_vector, sample7curr, sample7adjx, sample7adjy, sample7diag); - // Statically normalize weights (so total = 1.0), and sum weighted samples. - // Fill each row of a matrix with an rgb sample and pre-multiply by the - // weights to obtain a weighted result. First do the simple ones: - float3 sum = float3(0.0,0.0,0.0); - sum += mul(w0, float4x3(sample0curr, sample0adjx, sample0adjy, sample0diag)); - sum += mul(w1, float4x3(sample1curr, sample1adjx, sample1adjy, sample1diag)); - sum += mul(w3, float4x3(sample3curr, sample3adjx, sample3adjy, sample3diag)); - sum += mul(w4, float4x3(sample4curr, sample4adjx, sample4adjy, sample4diag)); - // Now do the mixed-sample ones: - sum += mul(w2and5, float4x3(sample2curr, sample2adjy, sample5curr, sample5adjy)); - sum += mul(w6and7, float4x3(sample6curr, sample6adjx, sample7curr, sample7adjx)); - sum += w8curr * sample8curr; - // Normalize the sum (so the weights add to 1.0) and return: - return sum * weight_sum_inv; -} - -float3 tex2Dblur8x8shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float sigma, - const float input_gamma) -{ - // Perform a 1-pass mipmapped blur with shared samples across a pixel quad. - // Requires: Same as tex2Dblur12x12shared() - // Returns: A blurred texture lookup using a "virtual" 8x8 Gaussian - // blur (a 4x4 blur of carefully selected bilinear samples) - // of the given mip level. There will be subtle inaccuracies, - // especially for small or high-frequency detailed sources. - // Description: - // First see the description for tex2Dblur12x12shared(). This function - // shares the same concept and a similar sample placement, except each - // quadrant contains 4x4 texels and 2x2 samples instead of 6x6 and 3x3 - // respectively. There could be a total of 16 samples, 4 of which each - // fragment is responsible for, but each fragment loads 0a/0b/0c/0d with - // its own offset to reduce shared sample artifacts, bringing the sample - // count for each fragment to 7. Sample placement: - // 3a 2a 2b 3b - // 1a 0a 0b 1b - // 1c 0c 0d 1d - // 3c 2c 2d 3d - // Texel placement: - // 3a3 3a2 2a3 2a2 2b2 2b3 3b2 3b3 - // 3a1 3a0 2a1 2a0 2b0 2b1 3b0 3b1 - // 1a3 1a2 0a3 0a2 0b2 0b3 1b2 1b3 - // 1a1 1a0 0a1 0a0 0b0 0b1 1b0 1b1 - // 1c1 1c0 0c1 0c0 0d0 0d1 1d0 1d1 - // 1c3 1c2 0c3 0c2 0d2 0d3 1d2 1d3 - // 3c1 3c0 2c1 2c0 2d0 2d1 3d0 4d1 - // 3c3 3c2 2c3 2c2 2d2 2d3 3d2 4d3 - - // COMPUTE COORDS FOR TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Statically compute bilinear sampling offsets (details in tex2Dblur12x12shared). - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w0_5off = exp(-(0.5*0.5) * denom_inv); - const float w1off = exp(-(1.0*1.0) * denom_inv); - const float w1_5off = exp(-(1.5*1.5) * denom_inv); - const float w2off = exp(-(2.0*2.0) * denom_inv); - const float w2_5off = exp(-(2.5*2.5) * denom_inv); - const float w3_5off = exp(-(3.5*3.5) * denom_inv); - const float texel0to1ratio = lerp(w1_5off/(w0_5off + w1_5off), 0.5, error_blurring); - const float texel2to3ratio = lerp(w3_5off/(w2_5off + w3_5off), 0.5, error_blurring); - // We don't share sample0*, so use the nearest destination fragment: - const float texel0to1ratio_nearest = w1off/(w0off + w1off); - const float texel1to2ratio_nearest = w2off/(w1off + w2off); - // Statically compute texel offsets from the bottom-right fragment to each - // bilinear sample in the bottom-right quadrant: - const float2 sample0curr_texel_offset = float2(0.0, 0.0) + float2(texel0to1ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjx_texel_offset = float2(-1.0, 0.0) + float2(-texel1to2ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjy_texel_offset = float2(0.0, -1.0) + float2(texel0to1ratio_nearest, -texel1to2ratio_nearest); - const float2 sample0diag_texel_offset = float2(-1.0, -1.0) + float2(-texel1to2ratio_nearest, -texel1to2ratio_nearest); - const float2 sample1_texel_offset = float2(2.0, 0.0) + float2(texel2to3ratio, texel0to1ratio); - const float2 sample2_texel_offset = float2(0.0, 2.0) + float2(texel0to1ratio, texel2to3ratio); - const float2 sample3_texel_offset = float2(2.0, 2.0) + float2(texel2to3ratio, texel2to3ratio); - - // CALCULATE KERNEL WEIGHTS: - // Statically compute bilinear sample weights at each destination fragment - // from the sum of their 4 texel weights (details in tex2Dblur12x12shared). - #define GET_TEXEL_QUAD_WEIGHTS(xoff, yoff) \ - (exp(-LENGTH_SQ(float2(xoff, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff, yoff + 1.0)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff + 1.0)) * denom_inv)) - const float w3diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -4.0); - const float w2diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -4.0); - const float w2adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -4.0); - const float w3adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -4.0); - const float w1diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -2.0); - const float w0diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -2.0); - const float w0adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -2.0); - const float w1adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -2.0); - const float w1adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 0.0); - const float w0adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 0.0); - const float w0curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 0.0); - const float w1curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 0.0); - const float w3adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 2.0); - const float w2adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 2.0); - const float w2curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 2.0); - const float w3curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 2.0); - #undef GET_TEXEL_QUAD_WEIGHTS - // Statically pack weights for runtime: - const float4 w0 = float4(w0curr, w0adjx, w0adjy, w0diag); - const float4 w1 = float4(w1curr, w1adjx, w1adjy, w1diag); - const float4 w2 = float4(w2curr, w2adjx, w2adjy, w2diag); - const float4 w3 = float4(w3curr, w3adjx, w3adjy, w3diag); - // Get the weight sum inverse (normalization factor): - const float4 weight_sum4 = w0 + w1 + w2 + w3; - const float2 weight_sum2 = weight_sum4.xy + weight_sum4.zw; - const float weight_sum = weight_sum2.x + weight_sum2.y; - const float weight_sum_inv = 1.0/(weight_sum); - - // LOAD TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Get a uv vector from texel 0q0 of this quadrant to texel 0q3: - const float2 dxdy_curr = dxdy * quad_vector.xy; - // Load bilinear samples for the current quadrant (for this fragment): - const float3 sample0curr = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0curr_texel_offset, input_gamma).rgb; - const float3 sample0adjx = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjx_texel_offset, input_gamma).rgb; - const float3 sample0adjy = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjy_texel_offset, input_gamma).rgb; - const float3 sample0diag = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0diag_texel_offset, input_gamma).rgb; - const float3 sample1curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample1_texel_offset), input_gamma).rgb; - const float3 sample2curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample2_texel_offset), input_gamma).rgb; - const float3 sample3curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample3_texel_offset), input_gamma).rgb; - - // GATHER NEIGHBORING SAMPLES AND SUM WEIGHTED SAMPLES: - // Fetch the samples from other fragments in the 2x2 quad: - float3 sample1adjx, sample1adjy, sample1diag; - float3 sample2adjx, sample2adjy, sample2diag; - float3 sample3adjx, sample3adjy, sample3diag; - quad_gather(quad_vector, sample1curr, sample1adjx, sample1adjy, sample1diag); - quad_gather(quad_vector, sample2curr, sample2adjx, sample2adjy, sample2diag); - quad_gather(quad_vector, sample3curr, sample3adjx, sample3adjy, sample3diag); - // Statically normalize weights (so total = 1.0), and sum weighted samples. - // Fill each row of a matrix with an rgb sample and pre-multiply by the - // weights to obtain a weighted result: - float3 sum = float3(0.0,0.0,0.0); - sum += mul(w0, float4x3(sample0curr, sample0adjx, sample0adjy, sample0diag)); - sum += mul(w1, float4x3(sample1curr, sample1adjx, sample1adjy, sample1diag)); - sum += mul(w2, float4x3(sample2curr, sample2adjx, sample2adjy, sample2diag)); - sum += mul(w3, float4x3(sample3curr, sample3adjx, sample3adjy, sample3diag)); - return sum * weight_sum_inv; -} - -float3 tex2Dblur6x6shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float sigma, - const float input_gamma) -{ - // Perform a 1-pass mipmapped blur with shared samples across a pixel quad. - // Requires: Same as tex2Dblur12x12shared() - // Returns: A blurred texture lookup using a "virtual" 6x6 Gaussian - // blur (a 3x3 blur of carefully selected bilinear samples) - // of the given mip level. There will be some inaccuracies,subtle inaccuracies, - // especially for small or high-frequency detailed sources. - // Description: - // First see the description for tex2Dblur8x8shared(). This - // function shares the same concept and sample placement, but each fragment - // only uses 9 of the 16 samples taken across the pixel quad (to cover a - // 3x3 sample area, or 6x6 texel area), and it uses a lower standard - // deviation to compensate. Thanks to symmetry, the 7 omitted samples - // are always the "same:" - // 1adjx, 3adjx - // 2adjy, 3adjy - // 1diag, 2diag, 3diag - - // COMPUTE COORDS FOR TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Statically compute bilinear sampling offsets (details in tex2Dblur12x12shared). - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w0_5off = exp(-(0.5*0.5) * denom_inv); - const float w1off = exp(-(1.0*1.0) * denom_inv); - const float w1_5off = exp(-(1.5*1.5) * denom_inv); - const float w2off = exp(-(2.0*2.0) * denom_inv); - const float w2_5off = exp(-(2.5*2.5) * denom_inv); - const float w3_5off = exp(-(3.5*3.5) * denom_inv); - const float texel0to1ratio = lerp(w1_5off/(w0_5off + w1_5off), 0.5, error_blurring); - const float texel2to3ratio = lerp(w3_5off/(w2_5off + w3_5off), 0.5, error_blurring); - // We don't share sample0*, so use the nearest destination fragment: - const float texel0to1ratio_nearest = w1off/(w0off + w1off); - const float texel1to2ratio_nearest = w2off/(w1off + w2off); - // Statically compute texel offsets from the bottom-right fragment to each - // bilinear sample in the bottom-right quadrant: - const float2 sample0curr_texel_offset = float2(0.0, 0.0) + float2(texel0to1ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjx_texel_offset = float2(-1.0, 0.0) + float2(-texel1to2ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjy_texel_offset = float2(0.0, -1.0) + float2(texel0to1ratio_nearest, -texel1to2ratio_nearest); - const float2 sample0diag_texel_offset = float2(-1.0, -1.0) + float2(-texel1to2ratio_nearest, -texel1to2ratio_nearest); - const float2 sample1_texel_offset = float2(2.0, 0.0) + float2(texel2to3ratio, texel0to1ratio); - const float2 sample2_texel_offset = float2(0.0, 2.0) + float2(texel0to1ratio, texel2to3ratio); - const float2 sample3_texel_offset = float2(2.0, 2.0) + float2(texel2to3ratio, texel2to3ratio); - - // CALCULATE KERNEL WEIGHTS: - // Statically compute bilinear sample weights at each destination fragment - // from the sum of their 4 texel weights (details in tex2Dblur12x12shared). - #define GET_TEXEL_QUAD_WEIGHTS(xoff, yoff) \ - (exp(-LENGTH_SQ(float2(xoff, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff, yoff + 1.0)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff + 1.0)) * denom_inv)) - // We only need 9 of the 16 sample weights. Skip the following weights: - // 1adjx, 3adjx - // 2adjy, 3adjy - // 1diag, 2diag, 3diag - const float w0diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -2.0); - const float w0adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -2.0); - const float w1adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -2.0); - const float w0adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 0.0); - const float w0curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 0.0); - const float w1curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 0.0); - const float w2adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 2.0); - const float w2curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 2.0); - const float w3curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 2.0); - #undef GET_TEXEL_QUAD_WEIGHTS - // Get the weight sum inverse (normalization factor): - const float weight_sum_inv = 1.0/(w0curr + w1curr + w2curr + w3curr + - w0adjx + w2adjx + w0adjy + w1adjy + w0diag); - // Statically pack some weights for runtime: - const float4 w0 = float4(w0curr, w0adjx, w0adjy, w0diag); - - // LOAD TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Get a uv vector from texel 0q0 of this quadrant to texel 0q3: - const float2 dxdy_curr = dxdy * quad_vector.xy; - // Load bilinear samples for the current quadrant (for this fragment): - const float3 sample0curr = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0curr_texel_offset, input_gamma).rgb; - const float3 sample0adjx = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjx_texel_offset, input_gamma).rgb; - const float3 sample0adjy = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjy_texel_offset, input_gamma).rgb; - const float3 sample0diag = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0diag_texel_offset, input_gamma).rgb; - const float3 sample1curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample1_texel_offset), input_gamma).rgb; - const float3 sample2curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample2_texel_offset), input_gamma).rgb; - const float3 sample3curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample3_texel_offset), input_gamma).rgb; - - // GATHER NEIGHBORING SAMPLES AND SUM WEIGHTED SAMPLES: - // Fetch the samples from other fragments in the 2x2 quad: - float3 sample1adjx, sample1adjy, sample1diag; - float3 sample2adjx, sample2adjy, sample2diag; - quad_gather(quad_vector, sample1curr, sample1adjx, sample1adjy, sample1diag); - quad_gather(quad_vector, sample2curr, sample2adjx, sample2adjy, sample2diag); - // Statically normalize weights (so total = 1.0), and sum weighted samples. - // Fill each row of a matrix with an rgb sample and pre-multiply by the - // weights to obtain a weighted result for sample1*, and handle the rest - // of the weights more directly/verbosely: - float3 sum = float3(0.0,0.0,0.0); - sum += mul(w0, float4x3(sample0curr, sample0adjx, sample0adjy, sample0diag)); - sum += w1curr * sample1curr + w1adjy * sample1adjy + w2curr * sample2curr + - w2adjx * sample2adjx + w3curr * sample3curr; - return sum * weight_sum_inv; -} - - -/////////////////////// MAX OPTIMAL SIGMA BLUR WRAPPERS ////////////////////// - -// The following blurs are static wrappers around the dynamic blurs above. -// HOPEFULLY, the compiler will be smart enough to do constant-folding. - -// Resizable separable blurs: -float3 tex2Dblur11resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur11resize(tex, tex_uv, dxdy, blur11_std_dev, input_gamma); -} -float3 tex2Dblur9resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur9resize(tex, tex_uv, dxdy, blur9_std_dev, input_gamma); -} -float3 tex2Dblur7resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur7resize(tex, tex_uv, dxdy, blur7_std_dev, input_gamma); -} -float3 tex2Dblur5resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur5resize(tex, tex_uv, dxdy, blur5_std_dev, input_gamma); -} -float3 tex2Dblur3resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur3resize(tex, tex_uv, dxdy, blur3_std_dev, input_gamma); -} -// Fast separable blurs: -float3 tex2Dblur11fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur11fast(tex, tex_uv, dxdy, blur11_std_dev, input_gamma); -} -float3 tex2Dblur9fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur9fast(tex, tex_uv, dxdy, blur9_std_dev, input_gamma); -} -float3 tex2Dblur7fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur7fast(tex, tex_uv, dxdy, blur7_std_dev, input_gamma); -} -float3 tex2Dblur5fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur5fast(tex, tex_uv, dxdy, blur5_std_dev, input_gamma); -} -float3 tex2Dblur3fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur3fast(tex, tex_uv, dxdy, blur3_std_dev, input_gamma); -} -// Huge, "fast" separable blurs: -float3 tex2Dblur43fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur43fast(tex, tex_uv, dxdy, blur43_std_dev, input_gamma); -} -float3 tex2Dblur31fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur31fast(tex, tex_uv, dxdy, blur31_std_dev, input_gamma); -} -float3 tex2Dblur25fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur25fast(tex, tex_uv, dxdy, blur25_std_dev, input_gamma); -} -float3 tex2Dblur17fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur17fast(tex, tex_uv, dxdy, blur17_std_dev, input_gamma); -} -// Resizable one-pass blurs: -float3 tex2Dblur3x3resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur3x3resize(tex, tex_uv, dxdy, blur3_std_dev, input_gamma); -} -// "Fast" one-pass blurs: -float3 tex2Dblur9x9(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur9x9(tex, tex_uv, dxdy, blur9_std_dev, input_gamma); -} -float3 tex2Dblur7x7(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur7x7(tex, tex_uv, dxdy, blur7_std_dev, input_gamma); -} -float3 tex2Dblur5x5(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur5x5(tex, tex_uv, dxdy, blur5_std_dev, input_gamma); -} -float3 tex2Dblur3x3(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, - const float input_gamma) -{ - return tex2Dblur3x3(tex, tex_uv, dxdy, blur3_std_dev, input_gamma); -} -// "Fast" shared-sample one-pass blurs: -float3 tex2Dblur12x12shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float input_gamma) -{ - return tex2Dblur12x12shared(tex, tex_uv, dxdy, quad_vector, blur12_std_dev, input_gamma); -} -float3 tex2Dblur10x10shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float input_gamma) -{ - return tex2Dblur10x10shared(tex, tex_uv, dxdy, quad_vector, blur10_std_dev, input_gamma); -} -float3 tex2Dblur8x8shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float input_gamma) -{ - return tex2Dblur8x8shared(tex, tex_uv, dxdy, quad_vector, blur8_std_dev, input_gamma); -} -float3 tex2Dblur6x6shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float input_gamma) -{ - return tex2Dblur6x6shared(tex, tex_uv, dxdy, quad_vector, blur6_std_dev, input_gamma); -} - - -#endif // _BLUR_FUNCTIONS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/derived-settings-and-constants.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/derived-settings-and-constants.fxh deleted file mode 100644 index 8443c321c..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/derived-settings-and-constants.fxh +++ /dev/null @@ -1,405 +0,0 @@ -#ifndef _DERIVED_SETTINGS_AND_CONSTANTS_H -#define _DERIVED_SETTINGS_AND_CONSTANTS_H - -#include "helper-functions-and-macros.fxh" -#include "user-settings.fxh" - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade. -// Copyright (C) 2020 Alex Gunter -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// These macros and constants can be used across the whole codebase. -// Unlike the values in user-settings.cgh, end users shouldn't modify these. - - -/////////////////////////////// BEGIN INCLUDES /////////////////////////////// - -//#include "../user-settings.h" - -//#include "user-cgp-constants.h" - -///////////////////////// BEGIN USER-CGP-CONSTANTS ///////////////////////// - -#ifndef _USER_CGP_CONSTANTS_H -#define _USER_CGP_CONSTANTS_H - -// IMPORTANT: -// These constants MUST be set appropriately for the settings in crt-royale.cgp -// (or whatever related .cgp file you're using). If they aren't, you're likely -// to get artifacts, the wrong phosphor mask size, etc. I wish these could be -// set directly in the .cgp file to make things easier, but...they can't. - -// PASS SCALES AND RELATED CONSTANTS: -// Copy the absolute scale_x for BLOOM_APPROX. There are two major versions of -// this shader: One does a viewport-scale bloom, and the other skips it. The -// latter benefits from a higher bloom_approx_scale_x, so save both separately: -static const float bloom_approx_scale_x = 4.0 / 3.0; -static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0); -static const float bloom_diff_thresh_ = 1.0/256.0; - -static const float bloom_approx_size_x = 320.0; -static const float bloom_approx_size_x_for_fake = 400.0; -// Copy the viewport-relative scales of the phosphor mask resize passes -// (MASK_RESIZE and the pass immediately preceding it): -static const float2 mask_resize_viewport_scale = float2(0.0625, 0.0625); -// Copy the geom_max_aspect_ratio used to calculate the MASK_RESIZE scales, etc.: -static const float geom_max_aspect_ratio = 4.0/3.0; - -// PHOSPHOR MASK TEXTURE CONSTANTS: -// Set the following constants to reflect the properties of the phosphor mask -// texture named in crt-royale.cgp. The shader optionally resizes a mask tile -// based on user settings, then repeats a single tile until filling the screen. -// The shader must know the input texture size (default 64x64), and to manually -// resize, it must also know the horizontal triads per tile (default 8). -static const float2 mask_texture_small_size = float2(64.0, 64.0); -static const float2 mask_texture_large_size = float2(512.0, 512.0); -static const float mask_triads_per_tile = 8.0; -// We need the average brightness of the phosphor mask to compensate for the -// dimming it causes. The following four values are roughly correct for the -// masks included with the shader. Update the value for any LUT texture you -// change. [Un]comment "#define PHOSPHOR_MASK_GRILLE14" depending on whether -// the loaded aperture grille uses 14-pixel or 15-pixel stripes (default 15). -// #ifndef PHOSPHOR_MASK_GRILLE14 -// #define PHOSPHOR_MASK_GRILLE14 0 -// #endif -static const float mask_grille14_avg_color = 50.6666666/255.0; - // TileableLinearApertureGrille14Wide7d33Spacing*.png - // TileableLinearApertureGrille14Wide10And6Spacing*.png -static const float mask_grille15_avg_color = 53.0/255.0; - // TileableLinearApertureGrille15Wide6d33Spacing*.png - // TileableLinearApertureGrille15Wide8And5d5Spacing*.png -static const float mask_slot_avg_color = 46.0/255.0; - // TileableLinearSlotMask15Wide9And4d5Horizontal8VerticalSpacing*.png - // TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing*.png -static const float mask_shadow_avg_color = 41.0/255.0; - // TileableLinearShadowMask*.png - // TileableLinearShadowMaskEDP*.png - -// #if PHOSPHOR_MASK_GRILLE14 -// static const float mask_grille_avg_color = mask_grille14_avg_color; -// #else - static const float mask_grille_avg_color = mask_grille15_avg_color; -// #endif - - -#endif // _USER_CGP_CONSTANTS_H - -////////////////////////// END USER-CGP-CONSTANTS ////////////////////////// - -//////////////////////////////// END INCLUDES //////////////////////////////// - -/////////////////////////////// FIXED SETTINGS /////////////////////////////// - - - -#define _SIMULATE_CRT_ON_LCD 1 -#define _SIMULATE_GBA_ON_LCD 2 -#define _SIMULATE_LCD_ON_CRT 3 -#define _SIMULATE_GBA_ON_CRT 4 - -// Ensure the first pass decodes CRT gamma and the last encodes LCD gamma. -#define GAMMA_SIMULATION_MODE _SIMULATE_CRT_ON_LCD - -// Manually tiling a manually resized texture creates texture coord derivative -// discontinuities and confuses anisotropic filtering, causing discolored tile -// seams in the phosphor mask. Workarounds: -// a.) Using tex2Dlod disables anisotropic filtering for tiled masks. It's -// downgraded to tex2Dbias without _DRIVERS_ALLOW_TEX2DLOD #defined and -// disabled without _DRIVERS_ALLOW_TEX2DBIAS #defined either. -// b.) "Tile flat twice" requires drawing two full tiles without border padding -// to the resized mask FBO, and it's incompatible with same-pass curvature. -// (Same-pass curvature isn't used but could be in the future...maybe.) -// c.) "Fix discontinuities" requires derivatives and drawing one tile with -// border padding to the resized mask FBO, but it works with same-pass -// curvature. It's disabled without _DRIVERS_ALLOW_DERIVATIVES #defined. -// Precedence: a, then, b, then c (if multiple strategies are #defined). -// #ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD -// #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD 1 // 129.7 FPS, 4x, flat; 101.8 at fullscreen -// #endif -// #ifndef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE -// #define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE 1 // 128.1 FPS, 4x, flat; 101.5 at fullscreen -// #endif -// #ifndef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES -// #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES 1 // 124.4 FPS, 4x, flat; 97.4 at fullscreen -// #endif -// Also, manually resampling the phosphor mask is slightly blurrier with -// anisotropic filtering. (Resampling with mipmapping is even worse: It -// creates artifacts, but only with the fully bloomed shader.) The difference -// is subtle with small triads, but you can fix it for a small cost. -// #ifndef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD -// #define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD 0 -// #endif - - -////////////////////////////// DERIVED SETTINGS ////////////////////////////// - -// Intel HD 4000 GPU's can't handle manual mask resizing (for now), setting the -// geometry mode at runtime, or a 4x4 true Gaussian resize. Disable -// incompatible settings ASAP. (_INTEGRATED_GRAPHICS_COMPATIBILITY_MODE may be -// #defined by either user-settings.h or a wrapper .cg that #includes the -// current .cg pass.) -#if _INTEGRATED_GRAPHICS_COMPATIBILITY_MODE - #if _PHOSPHOR_MASK_MANUALLY_RESIZE - #undef _PHOSPHOR_MASK_MANUALLY_RESIZE - #define _PHOSPHOR_MASK_MANUALLY_RESIZE 0 - #endif - #if _RUNTIME_GEOMETRY_MODE - #undef _RUNTIME_GEOMETRY_MODE - #define _RUNTIME_GEOMETRY_MODE 0 - #endif - // Mode 2 (4x4 Gaussian resize) won't work, and mode 1 (3x3 blur) is - // inferior in most cases, so replace 2.0 with 0.0: - static const float bloom_approx_filter = macro_cond( - bloom_approx_filter_static > 1.5, - 0.0, - bloom_approx_filter_static - ); -#else - static const float bloom_approx_filter = bloom_approx_filter_static; -#endif - -// Disable slow runtime paths if static parameters are used. Most of these -// won't be a problem anyway once the params are disabled, but some will. -#if !_RUNTIME_SHADER_PARAMS_ENABLE - #if _RUNTIME_PHOSPHOR_BLOOM_SIGMA - #undef _RUNTIME_PHOSPHOR_BLOOM_SIGMA - #define _RUNTIME_PHOSPHOR_BLOOM_SIGMA 0 - #endif - #if _RUNTIME_ANTIALIAS_WEIGHTS - #undef _RUNTIME_ANTIALIAS_WEIGHTS - #define _RUNTIME_ANTIALIAS_WEIGHTS 0 - #endif - #if _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS - #undef _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS - #define _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS 0 - #endif - #if _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE - #undef _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE - #define _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE 0 - #endif - #if _RUNTIME_GEOMETRY_TILT - #undef _RUNTIME_GEOMETRY_TILT - #define _RUNTIME_GEOMETRY_TILT 0 - #endif - #if _RUNTIME_GEOMETRY_MODE - #undef _RUNTIME_GEOMETRY_MODE - #define _RUNTIME_GEOMETRY_MODE 0 - #endif - // #if FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - // #undef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - // #define FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT 0 - // #endif -#endif - -// Make tex2Dbias a backup for tex2Dlod for wider compatibility. -// #if ANISOTROPIC_TILING_COMPAT_TEX2DLOD -// #define ANISOTROPIC_TILING_COMPAT_TEX2DBIAS -// #endif -// #if ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD -// #define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS -// #endif -// Rule out unavailable anisotropic compatibility strategies: -#if !_DRIVERS_ALLOW_DERIVATIVES - // #if ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - // #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - // #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES 0 - // #endif -#endif -// #if !_DRIVERS_ALLOW_TEX2DLOD - // #if ANISOTROPIC_TILING_COMPAT_TEX2DLOD - // #undef ANISOTROPIC_TILING_COMPAT_TEX2DLOD - // #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD 0 - // #endif - // #if ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD - // #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD - // #define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD 0 - // #endif - // #ifdef ANTIALIAS_DISABLE_ANISOTROPIC - // #undef ANTIALIAS_DISABLE_ANISOTROPIC - // #endif -// #endif -// #if !_DRIVERS_ALLOW_TEX2DBIAS - // #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS - // #undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS - // #endif - // #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS - // #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS - // #endif -// #endif -// Prioritize anisotropic tiling compatibility strategies by performance and -// disable unused strategies. This concentrates all the nesting in one place. -// #if ANISOTROPIC_TILING_COMPAT_TEX2DLOD -// #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS -// #undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS -// #endif -// #if ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE -// #undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE -// #define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE 0 -// #endif -// #if ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES -// #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES -// #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES 0 -// #endif -// #else -// #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS -// #if ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE -// #undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE -// #define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE 0 -// #endif -// #if ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES -// #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES -// #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES 0 -// #endif -// #else -// // ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE is only compatible with -// // flat texture coords in the same pass, but that's all we use. -// #if ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE -// #if ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES -// #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES -// #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES 0 -// #endif -// #endif -// #endif -// #endif -// The tex2Dlod and tex2Dbias strategies share a lot in common, and we can -// reduce some #ifdef nesting in the next section by essentially OR'ing them: -// #if ANISOTROPIC_TILING_COMPAT_TEX2DLOD -// #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY -// #endif -// #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS -// #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY -// #endif -// Prioritize anisotropic resampling compatibility strategies the same way: -// #if ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD -// #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS -// #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS -// #endif -// #endif - - -/////////////////////// DERIVED PHOSPHOR MASK CONSTANTS ////////////////////// - -// If we can use the large mipmapped LUT without mipmapping artifacts, we -// should: It gives us more options for using fewer samples. -// #if USE_LARGE_PHOSPHOR_MASK - // #if ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD - // // TODO: Take advantage of this! - // #define PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT - // static const float2 mask_resize_src_lut_size = mask_texture_large_size; - // #else -static const float2 mask_resize_src_lut_size = mask_texture_large_size; - // #endif -// #else -// static const float2 mask_resize_src_lut_size = mask_texture_small_size; -// #endif - -static const float tile_aspect_inv = mask_resize_src_lut_size.y/mask_resize_src_lut_size.x; - - -// tex2D's sampler2D parameter MUST be a uniform global, a uniform input to -// main_fragment, or a static alias of one of the above. This makes it hard -// to select the phosphor mask at runtime: We can't even assign to a uniform -// global in the vertex shader or select a sampler2D in the vertex shader and -// pass it to the fragment shader (even with explicit TEXUNIT# bindings), -// because it just gives us the input texture or a black screen. However, we -// can get around these limitations by calling tex2D three times with different -// uniform samplers (or resizing the phosphor mask three times altogether). -// With dynamic branches, we can process only one of these branches on top of -// quickly discarding fragments we don't need (cgc seems able to overcome -// limigations around dependent texture fetches inside of branches). Without -// dynamic branches, we have to process every branch for every fragment...which -// is slower. Runtime sampling mode selection is slower without dynamic -// branches as well. Let the user's static #defines decide if it's worth it. -#if _DRIVERS_ALLOW_DYNAMIC_BRANCHES - #define _RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT -// #else - // #if FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - // #define _RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - // #endif -#endif - -// We need to render some minimum number of tiles in the resize passes. -// We need at least 1.0 just to repeat a single tile, and we need extra -// padding beyond that for anisotropic filtering, discontinuitity fixing, -// antialiasing, same-pass curvature (not currently used), etc. First -// determine how many border texels and tiles we need, based on how the result -// will be sampled: -#ifdef GEOMETRY_EARLY - static const float max_subpixel_offset = aa_subpixel_r_offset_static.x; - // Most antialiasing filters have a base radius of 4.0 pixels: - static const float max_aa_base_pixel_border = 4.0 + - max_subpixel_offset; -#else - static const float max_aa_base_pixel_border = 0.0; -#endif -// Anisotropic filtering adds about 0.5 to the pixel border: -// #ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY -static const float max_aniso_pixel_border = max_aa_base_pixel_border + 0.5; -// #else -// static const float max_aniso_pixel_border = max_aa_base_pixel_border; -// #endif -// Fixing discontinuities adds 1.0 more to the pixel border: -// #if ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES -// static const float max_tiled_pixel_border = max_aniso_pixel_border + 1.0; -// #else - static const float max_tiled_pixel_border = max_aniso_pixel_border; -// #endif -// Convert the pixel border to an integer texel border. Assume same-pass -// curvature about triples the texel frequency: -#ifdef GEOMETRY_EARLY - #define max_mask_texel_border macro_ceil(max_tiled_pixel_border * 3.0f) -#else - #define max_mask_texel_border macro_ceil(max_tiled_pixel_border) -#endif -// Convert the texel border to a tile border using worst-case assumptions: -static const float max_mask_tile_border = max_mask_texel_border/ -(mask_min_allowed_triad_size * mask_triads_per_tile); - -// Finally, set the number of resized tiles to render to MASK_RESIZE, and set -// the starting texel (inside borders) for sampling it. -#ifndef GEOMETRY_EARLY - // #if ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE - // Special case: Render two tiles without borders. Anisotropic - // filtering doesn't seem to be a problem here. - // static const float mask_resize_num_tiles = 1.0 + 1.0; - // static const float mask_start_texels = 0.0; - // #else - static const float mask_resize_num_tiles = 1.0 + 2.0 * max_mask_tile_border; - static const float mask_start_texels = max_mask_texel_border; - // #endif -#else - static const float mask_resize_num_tiles = 1.0 + 2.0*max_mask_tile_border; - static const float mask_start_texels = max_mask_texel_border; -#endif - -// We have to fit mask_resize_num_tiles into an FBO with a viewport scale of -// mask_resize_viewport_scale. This limits the maximum final triad size. -// Estimate the minimum number of triads we can split the screen into in each -// dimension (we'll be as correct as mask_resize_viewport_scale is): -static const float mask_resize_num_triads = mask_resize_num_tiles * mask_triads_per_tile; -static const float2 min_allowed_viewport_triads = -float2(mask_resize_num_triads, mask_resize_num_triads) / mask_resize_viewport_scale; - - - -#endif // _DERIVED_SETTINGS_AND_CONSTANTS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/downsampling-functions.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/downsampling-functions.fxh deleted file mode 100644 index 7e8404994..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/downsampling-functions.fxh +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef _DOWNSAMPLING_FUNCTIONS_H -#define _DOWNSAMPLING_FUNCTIONS_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2020 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - -float3 opaque_linear_downsample( - const sampler2D tex, - const float2 texcoord, - const uint num_pairs, - const float2 delta_uv -) { - const uint total_num_samples = num_pairs * 2 + 1; - const float2 coord_left = texcoord - delta_uv * num_pairs; - - float3 acc = 0; - for(int i = 0; i < total_num_samples; i++) { - const float2 coord = coord_left + i * delta_uv; - acc += tex2D_nograd(tex, coord).rgb; - } - - return acc / total_num_samples; -} - - -float3 opaque_lanczos_downsample( - const sampler2D tex, - const float2 texcoord, - const uint num_pairs, - const float2 delta_uv, - const float num_sinc_lobes, - const float weight_at_center -) { - const uint total_num_samples = num_pairs * 2 + 1; - const float2 coord_left = texcoord - delta_uv * num_pairs; - const float sinc_dx = num_sinc_lobes / num_pairs; // 2 * num_sinc_lobes / (total_num_samples - 1) - - float3 acc = 0; - float w_sum = 0; - for(int i = 0; i < total_num_samples; i++) { - const float2 coord = coord_left + i * delta_uv; - const float sinc_x = i * sinc_dx; - - const float weight = (i != num_pairs) ? - num_sinc_lobes * sin(pi*sinc_x) * sin(pi*sinc_x/num_sinc_lobes) / (pi*pi * sinc_x*sinc_x) : - weight_at_center; - - acc += weight * tex2D_nograd(tex, coord).rgb; - w_sum += weight; - } - - return acc / w_sum; -} - -float3 opaque_lanczos_downsample( - const sampler2D tex, - const float2 texcoord, - const uint num_pairs, - const float2 delta_uv, - const float num_sinc_lobes -) { - return opaque_lanczos_downsample(tex, texcoord, num_pairs, delta_uv, num_sinc_lobes, 1); -} - -#endif // _DOWNSAMPLING_FUNCTIONS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/gamma-management.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/gamma-management.fxh deleted file mode 100644 index a0ce35ff0..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/gamma-management.fxh +++ /dev/null @@ -1,225 +0,0 @@ -#ifndef _GAMMA_MANAGEMENT_H -#define _GAMMA_MANAGEMENT_H - - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey -// Copyright (C) 2020 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - -#include "helper-functions-and-macros.fxh" - - -/////////////////////////////// BASE CONSTANTS /////////////////////////////// - -// Set standard gamma constants, but allow users to override them: -#ifndef OVERRIDE_STANDARD_GAMMA - // Standard encoding gammas: - static const float ntsc_gamma = 2.2; // Best to use NTSC for PAL too? - static const float pal_gamma = 2.8; // Never actually 2.8 in practice - // Typical device decoding gammas (only use for emulating devices): - // CRT/LCD reference gammas are higher than NTSC and Rec.709 video standard - // gammas: The standards purposely undercorrected for an analog CRT's - // assumed 2.5 reference display gamma to maintain contrast in assumed - // [dark] viewing conditions: http://www.poynton.com/PDFs/GammaFAQ.pdf - // These unstated assumptions about display gamma and perceptual rendering - // intent caused a lot of confusion, and more modern CRT's seemed to target - // NTSC 2.2 gamma with circuitry. LCD displays seem to have followed suit - // (they struggle near black with 2.5 gamma anyway), especially PC/laptop - // displays designed to view sRGB in bright environments. (Standards are - // also in flux again with BT.1886, but it's underspecified for displays.) - static const float crt_reference_gamma_high = 2.5; // In (2.35, 2.55) - static const float crt_reference_gamma_low = 2.35; // In (2.35, 2.55) - static const float lcd_reference_gamma = 2.5; // To match CRT - static const float crt_office_gamma = 2.2; // Circuitry-adjusted for NTSC - static const float lcd_office_gamma = 2.2; // Approximates sRGB -#endif // OVERRIDE_STANDARD_GAMMA - -// Assuming alpha == 1.0 might make it easier for users to avoid some bugs, -// but only if they're aware of it. -#ifndef OVERRIDE_ALPHA_ASSUMPTIONS - static const bool assume_opaque_alpha = false; -#endif - - -/////////////////////// DERIVED CONSTANTS AS FUNCTIONS /////////////////////// - -// gamma-management.h should be compatible with overriding gamma values with -// runtime user parameters, but we can only define other global constants in -// terms of static constants, not uniform user parameters. To get around this -// limitation, we need to define derived constants using functions. - -// Set device gamma constants, but allow users to override them: -#if _OVERRIDE_DEVICE_GAMMA - // The user promises to globally define the appropriate constants: - float get_crt_gamma() { return crt_gamma; } - float get_gba_gamma() { return gba_gamma; } - float get_lcd_gamma() { return lcd_gamma; } -#else - float get_crt_gamma() { return crt_reference_gamma_high; } - float get_gba_gamma() { return 3.5; } // Game Boy Advance; in (3.0, 4.0) - float get_lcd_gamma() { return lcd_office_gamma; } -#endif // _OVERRIDE_DEVICE_GAMMA - -// Set decoding/encoding gammas for the first/lass passes, but allow overrides: -#ifdef OVERRIDE_FINAL_GAMMA - // The user promises to globally define the appropriate constants: - float get_intermediate_gamma() { return intermediate_gamma; } - float get_input_gamma() { return input_gamma; } - float get_output_gamma() { return output_gamma; } -#else - // If we gamma-correct every pass, always use ntsc_gamma between passes to - // ensure middle passes don't need to care if anything is being simulated: - - // TODO: Figure out the correct way to configure this now that intermediate - // FBOs all use get_intermediate_gamma() directly. Also refer to the - // original code to confirm when a shader uses ntsc_gamma despite - // GAMMA_ENCODE_EVERY_FBO being undefined. - // float get_intermediate_gamma() { return ntsc_gamma; } - float get_intermediate_gamma() { return 1.0; } - - #if GAMMA_SIMULATION_MODE == _SIMULATE_CRT_ON_LCD - float get_input_gamma() { return get_crt_gamma(); } - float get_output_gamma() { return get_lcd_gamma(); } - #else - #if GAMMA_SIMULATION_MODE == _SIMULATE_GBA_ON_LCD - float get_input_gamma() { return get_gba_gamma(); } - float get_output_gamma() { return get_lcd_gamma(); } - #else - #if GAMMA_SIMULATION_MODE == _SIMULATE_LCD_ON_CRT - float get_input_gamma() { return get_lcd_gamma(); } - float get_output_gamma() { return get_crt_gamma(); } - #else - #if GAMMA_SIMULATION_MODE == _SIMULATE_GBA_ON_CRT - float get_input_gamma() { return get_gba_gamma(); } - float get_output_gamma() { return get_crt_gamma(); } - #else // Don't simulate anything: - float get_input_gamma() { return ntsc_gamma; } - float get_output_gamma() { return ntsc_gamma; } - #endif // _SIMULATE_GBA_ON_CRT - #endif // _SIMULATE_LCD_ON_CRT - #endif // _SIMULATE_GBA_ON_LCD - #endif // _SIMULATE_CRT_ON_LCD -#endif // OVERRIDE_FINAL_GAMMA - - -// Set decoding/encoding gammas for the current pass. Use static constants for -// linearize_input and gamma_encode_output, because they aren't derived, and -// they let the compiler do dead-code elimination. -// #ifndef GAMMA_ENCODE_EVERY_FBO -// #ifdef FIRST_PASS -// static const bool linearize_input = true; -// float get_pass_input_gamma() { return get_input_gamma(); } -// #else -// static const bool linearize_input = false; -// float get_pass_input_gamma() { return 1.0; } -// #endif -// #ifdef LAST_PASS -// static const bool gamma_encode_output = true; -// float get_pass_output_gamma() { return get_output_gamma(); } -// #else -// static const bool gamma_encode_output = false; -// float get_pass_output_gamma() { return 1.0; } -// #endif -// #else -// static const bool linearize_input = true; -// static const bool gamma_encode_output = true; -// #ifdef FIRST_PASS -// float get_pass_input_gamma() { return get_input_gamma(); } -// #else -// float get_pass_input_gamma() { return get_intermediate_gamma(); } -// #endif -// #ifdef LAST_PASS -// float get_pass_output_gamma() { return get_output_gamma(); } -// #else -// float get_pass_output_gamma() { return get_intermediate_gamma(); } -// #endif -// #endif - -// Users might want to know if bilinear filtering will be gamma-correct: -// static const bool gamma_aware_bilinear = !linearize_input; - - -////////////////////// COLOR ENCODING/DECODING FUNCTIONS ///////////////////// - -float4 encode_output_opaque(const float4 color, const float gamma) -{ - static const float3 g = 1.0 / float3(gamma, gamma, gamma); - return float4(pow(color.rgb, g), 1); -} - -float4 decode_input_opaque(const float4 color, const float gamma) -{ - static const float3 g = float3(gamma, gamma, gamma); - return float4(pow(color.rgb, g), 1); -} - -float4 encode_output(const float4 color, const float gamma) -{ - static const float3 g = 1.0 / float3(gamma, gamma, gamma); - return float4(pow(color.rgb, g), color.a); -} - -float4 decode_input(const float4 color, const float gamma) -{ - static const float3 g = float3(gamma, gamma, gamma); - return float4(pow(color.rgb, g), color.a); -} - -/////////////////////////// TEXTURE LOOKUP WRAPPERS ////////////////////////// - -// "SMART" LINEARIZING TEXTURE LOOKUP FUNCTIONS: -// Provide a wide array of linearizing texture lookup wrapper functions. The -// Cg shader spec Retroarch uses only allows for 2D textures, but 1D and 3D -// lookups are provided for completeness in case that changes someday. Nobody -// is likely to use the *fetch and *proj functions, but they're included just -// in case. The only tex*D texture sampling functions omitted are: -// - tex*Dcmpbias -// - tex*Dcmplod -// - tex*DARRAY* -// - tex*DMS* -// - Variants returning integers -// Standard line length restrictions are ignored below for vertical brevity. - -// tex2D: -float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const float gamma) -{ return decode_input(tex2D(tex, tex_coords), gamma); } - -float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float gamma) -{ return decode_input(tex2D(tex, tex_coords.xy), gamma); } - -// float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const int texel_off, const float gamma) -// { return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), texel_off), gamma); } - -// float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const int texel_off, const float gamma) -// { return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), texel_off), gamma); } - -// tex2Dlod: -float4 tex2Dlod_linearize(const sampler2D tex, const float2 tex_coords, const float gamma) -{ return decode_input(tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0), gamma); } - -float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const float gamma) -{ return decode_input(tex2Dlod(tex, float4(tex_coords.xy, 0, 0), 0.0), gamma); } - -// float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off, const float gamma) -// { return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), texel_off), gamma); } - -#endif // _GAMMA_MANAGEMENT_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/geometry-functions.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/geometry-functions.fxh deleted file mode 100644 index c16d03f9a..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/geometry-functions.fxh +++ /dev/null @@ -1,715 +0,0 @@ -#ifndef _GEOMETRY_FUNCTIONS_H -#define _GEOMETRY_FUNCTIONS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "user-settings.fxh" -#include "derived-settings-and-constants.fxh" -#include "bind-shader-params.fxh" - - -//////////////////////////// MACROS AND CONSTANTS //////////////////////////// - -// Curvature-related constants: -#define MAX_POINT_CLOUD_SIZE 9 - - -///////////////////////////// CURVATURE FUNCTIONS ///////////////////////////// - -float2 quadratic_solve(const float a, const float b_over_2, const float c) -{ - // Requires: 1.) a, b, and c are quadratic formula coefficients - // 2.) b_over_2 = b/2.0 (simplifies terms to factor 2 out) - // 3.) b_over_2 must be guaranteed < 0.0 (avoids a branch) - // Returns: Returns float2(first_solution, discriminant), so the caller - // can choose how to handle the "no intersection" case. The - // Kahan or Citardauq formula is used for numerical robustness. - const float discriminant = b_over_2*b_over_2 - a*c; - const float solution0 = c/(-b_over_2 + sqrt(discriminant)); - return float2(solution0, discriminant); -} - -float2 intersect_sphere(const float3 view_vec, const float3 eye_pos_vec) -{ - // Requires: 1.) view_vec and eye_pos_vec are 3D vectors in the sphere's - // local coordinate frame (eye_pos_vec is a position, i.e. - // a vector from the origin to the eye/camera) - // 2.) geom_radius is a global containing the sphere's radius - // Returns: Cast a ray of direction view_vec from eye_pos_vec at a - // sphere of radius geom_radius, and return the distance to - // the first intersection in units of length(view_vec). - // http://wiki.cgsociety.org/index.php/Ray_Sphere_Intersection - // Quadratic formula coefficients (b_over_2 is guaranteed negative): - const float a = dot(view_vec, view_vec); - const float b_over_2 = dot(view_vec, eye_pos_vec); // * 2.0 factored out - const float c = dot(eye_pos_vec, eye_pos_vec) - geom_radius*geom_radius; - return quadratic_solve(a, b_over_2, c); -} - -float2 intersect_cylinder(const float3 view_vec, const float3 eye_pos_vec) -{ - // Requires: 1.) view_vec and eye_pos_vec are 3D vectors in the sphere's - // local coordinate frame (eye_pos_vec is a position, i.e. - // a vector from the origin to the eye/camera) - // 2.) geom_radius is a global containing the cylinder's radius - // Returns: Cast a ray of direction view_vec from eye_pos_vec at a - // cylinder of radius geom_radius, and return the distance to - // the first intersection in units of length(view_vec). The - // derivation of the coefficients is in Christer Ericson's - // Real-Time Collision Detection, p. 195-196, and this version - // uses LaGrange's identity to reduce operations. - // Arbitrary "cylinder top" reference point for an infinite cylinder: - const float3 cylinder_top_vec = float3(0.0, geom_radius, 0.0); - const float3 cylinder_axis_vec = float3(0.0, 1.0, 0.0);//float3(0.0, 2.0*geom_radius, 0.0); - const float3 top_to_eye_vec = eye_pos_vec - cylinder_top_vec; - const float3 axis_x_view = cross(cylinder_axis_vec, view_vec); - const float3 axis_x_top_to_eye = cross(cylinder_axis_vec, top_to_eye_vec); - // Quadratic formula coefficients (b_over_2 is guaranteed negative): - const float a = dot(axis_x_view, axis_x_view); - const float b_over_2 = dot(axis_x_top_to_eye, axis_x_view); - const float c = dot(axis_x_top_to_eye, axis_x_top_to_eye) - - geom_radius*geom_radius;//*dot(cylinder_axis_vec, cylinder_axis_vec); - return quadratic_solve(a, b_over_2, c); -} - -float2 cylinder_xyz_to_uv(const float3 intersection_pos_local, - const float2 geom_aspect) -{ - // Requires: An xyz intersection position on a cylinder. - // Returns: video_uv coords mapped to range [-0.5, 0.5] - // Mapping: Define square_uv.x to be the signed arc length in xz-space, - // and define square_uv.y = -intersection_pos_local.y (+v = -y). - // Start with a numerically robust arc length calculation. - const float angle_from_image_center = atan2(intersection_pos_local.x, - intersection_pos_local.z); - const float signed_arc_len = angle_from_image_center * geom_radius; - // Get a uv-mapping where [-0.5, 0.5] maps to a "square" area, then divide - // by the aspect ratio to stretch the mapping appropriately: - const float2 square_uv = float2(signed_arc_len, -intersection_pos_local.y); - const float2 video_uv = square_uv / geom_aspect; - return video_uv; -} - -float3 cylinder_uv_to_xyz(const float2 video_uv, const float2 geom_aspect) -{ - // Requires: video_uv coords mapped to range [-0.5, 0.5] - // Returns: An xyz intersection position on a cylinder. This is the - // inverse of cylinder_xyz_to_uv(). - // Expand video_uv by the aspect ratio to get proportionate x/y lengths, - // then calculate an xyz position for the cylindrical mapping above. - const float2 square_uv = video_uv * geom_aspect; - const float arc_len = square_uv.x; - const float angle_from_image_center = arc_len / geom_radius; - const float x_pos = sin(angle_from_image_center) * geom_radius; - const float z_pos = cos(angle_from_image_center) * geom_radius; - // Or: z = sqrt(geom_radius**2 - x**2) - // Or: z = geom_radius/sqrt(1.0 + tan(angle)**2), x = z * tan(angle) - const float3 intersection_pos_local = float3(x_pos, -square_uv.y, z_pos); - return intersection_pos_local; -} - -float2 sphere_xyz_to_uv(const float3 intersection_pos_local, - const float2 geom_aspect) -{ - // Requires: An xyz intersection position on a sphere. - // Returns: video_uv coords mapped to range [-0.5, 0.5] - // Mapping: First define square_uv.x/square_uv.y == - // intersection_pos_local.x/intersection_pos_local.y. Then, - // length(square_uv) is the arc length from the image center - // at (0.0, 0.0, geom_radius) along the tangent great circle. - // Credit for this mapping goes to cgwg: I never managed to - // understand his code, but he told me his mapping was based on - // great circle distances when I asked him about it, which - // informed this very similar (almost identical) mapping. - // Start with a numerically robust arc length calculation between the ray- - // sphere intersection point and the image center using a method posted by - // Roger Stafford on comp.soft-sys.matlab: - // https://groups.google.com/d/msg/comp.soft-sys.matlab/zNbUui3bjcA/c0HV_bHSx9cJ - const float3 image_center_pos_local = float3(0.0, 0.0, geom_radius); - const float cp_len = - length(cross(intersection_pos_local, image_center_pos_local)); - const float dp = dot(intersection_pos_local, image_center_pos_local); - const float angle_from_image_center = atan2(cp_len, dp); - const float arc_len = angle_from_image_center * geom_radius; - // Get a uv-mapping where [-0.5, 0.5] maps to a "square" area, then divide - // by the aspect ratio to stretch the mapping appropriately: - const float2 square_uv_unit = normalize(float2(intersection_pos_local.x, - -intersection_pos_local.y)); - const float2 square_uv = arc_len * square_uv_unit; - const float2 video_uv = square_uv / geom_aspect; - return video_uv; -} - -float3 sphere_uv_to_xyz(const float2 video_uv, const float2 geom_aspect) -{ - // Requires: video_uv coords mapped to range [-0.5, 0.5] - // Returns: An xyz intersection position on a sphere. This is the - // inverse of sphere_xyz_to_uv(). - // Expand video_uv by the aspect ratio to get proportionate x/y lengths, - // then calculate an xyz position for the spherical mapping above. - if (video_uv.x != 0 && video_uv.y != 0) { - const float2 square_uv = video_uv * geom_aspect; - // Using length or sqrt here butchers the framerate on my 8800GTS if - // this function is called too many times, and so does taking the max - // component of square_uv/square_uv_unit (program length threshold?). - //float arc_len = length(square_uv); - const float2 square_uv_unit = normalize(square_uv); - const float arc_len = square_uv.y/square_uv_unit.y; - const float angle_from_image_center = arc_len / geom_radius; - const float xy_dist_from_sphere_center = - sin(angle_from_image_center) * geom_radius; - //float2 xy_pos = xy_dist_from_sphere_center * (square_uv/FIX_ZERO(arc_len)); - const float2 xy_pos = xy_dist_from_sphere_center * square_uv_unit; - const float z_pos = cos(angle_from_image_center) * geom_radius; - const float3 intersection_pos_local = float3(xy_pos.x, -xy_pos.y, z_pos); - return intersection_pos_local; - } - else if (video_uv.x != 0) { - const float2 square_uv = video_uv * geom_aspect; - // Using length or sqrt here butchers the framerate on my 8800GTS if - // this function is called too many times, and so does taking the max - // component of square_uv/square_uv_unit (program length threshold?). - //float arc_len = length(square_uv); - const float2 square_uv_unit = normalize(square_uv); - const float angle_from_image_center = 0; - const float xy_dist_from_sphere_center = sin(angle_from_image_center) * geom_radius; - const float2 xy_pos = xy_dist_from_sphere_center * square_uv_unit; - const float z_pos = cos(angle_from_image_center) * geom_radius; - const float3 intersection_pos_local = float3(xy_pos.x, -xy_pos.y, z_pos); - return intersection_pos_local; - } - else { - const float2 xy_pos = float2(0, 0); - const float z_pos = geom_radius; - const float3 intersection_pos_local = float3(xy_pos.x, -xy_pos.y, z_pos); - return intersection_pos_local; - } -} - -float2 sphere_alt_xyz_to_uv(const float3 intersection_pos_local, - const float2 geom_aspect) -{ - // Requires: An xyz intersection position on a cylinder. - // Returns: video_uv coords mapped to range [-0.5, 0.5] - // Mapping: Define square_uv.x to be the signed arc length in xz-space, - // and define square_uv.y == signed arc length in yz-space. - // See cylinder_xyz_to_uv() for implementation details (very similar). - const float2 angle_from_image_center = atan2( - float2(intersection_pos_local.x, -intersection_pos_local.y), - intersection_pos_local.zz); - const float2 signed_arc_len = angle_from_image_center * geom_radius; - const float2 video_uv = signed_arc_len / geom_aspect; - return video_uv; -} - -float3 sphere_alt_uv_to_xyz(const float2 video_uv, const float2 geom_aspect) -{ - // Requires: video_uv coords mapped to range [-0.5, 0.5] - // Returns: An xyz intersection position on a sphere. This is the - // inverse of sphere_alt_xyz_to_uv(). - // See cylinder_uv_to_xyz() for implementation details (very similar). - const float2 square_uv = video_uv * geom_aspect; - const float2 arc_len = square_uv; - const float2 angle_from_image_center = arc_len / geom_radius; - const float2 xy_pos = sin(angle_from_image_center) * geom_radius; - const float z_pos = sqrt(geom_radius*geom_radius - dot(xy_pos, xy_pos)); - return float3(xy_pos.x, -xy_pos.y, z_pos); -} - -float2 intersect(const float3 view_vec_local, const float3 eye_pos_local, - const float geom_mode) -{ - return geom_mode < 2.5 ? intersect_sphere(view_vec_local, eye_pos_local) : - intersect_cylinder(view_vec_local, eye_pos_local); -} - -float2 xyz_to_uv(const float3 intersection_pos_local, - const float2 geom_aspect, const float geom_mode) -{ - return geom_mode < 1.5 ? - sphere_xyz_to_uv(intersection_pos_local, geom_aspect) : - geom_mode < 2.5 ? - sphere_alt_xyz_to_uv(intersection_pos_local, geom_aspect) : - cylinder_xyz_to_uv(intersection_pos_local, geom_aspect); -} - -float3 uv_to_xyz(const float2 uv, const float2 geom_aspect, - const float geom_mode) -{ - return geom_mode < 1.5 ? sphere_uv_to_xyz(uv, geom_aspect) : - geom_mode < 2.5 ? sphere_alt_uv_to_xyz(uv, geom_aspect) : - cylinder_uv_to_xyz(uv, geom_aspect); -} - -float2 view_vec_to_uv(const float3 view_vec_local, const float3 eye_pos_local, - const float2 geom_aspect, const float geom_mode, out float3 intersection_pos) -{ - // Get the intersection point on the primitive, given an eye position - // and view vector already in its local coordinate frame: - const float2 intersect_dist_and_discriminant = intersect(view_vec_local, - eye_pos_local, geom_mode); - const float3 intersection_pos_local = eye_pos_local + - view_vec_local * intersect_dist_and_discriminant.x; - // Save the intersection position to an output parameter: - intersection_pos = intersection_pos_local; - // Transform into uv coords, but give out-of-range coords if the - // view ray doesn't intersect the primitive in the first place: - return intersect_dist_and_discriminant.y > 0.005 ? - xyz_to_uv(intersection_pos_local, geom_aspect, geom_mode) : float2(1.0, 1.0); -} - -float3 get_ideal_global_eye_pos_for_points(float3 eye_pos, - const float2 geom_aspect, const float3 global_coords[MAX_POINT_CLOUD_SIZE], - const int num_points) -{ - // Requires: Parameters: - // 1.) Starting eye_pos is a global 3D position at which the - // camera contains all points in global_coords[] in its FOV - // 2.) geom_aspect = get_aspect_vector( - // IN.output_size.x / IN.output_size.y); - // 3.) global_coords is a point cloud containing global xyz - // coords of extreme points on the simulated CRT screen. - // Globals: - // 1.) geom_view_dist must be > 0.0. It controls the "near - // plane" used to interpret flat_video_uv as a view - // vector, which controls the field of view (FOV). - // Eyespace coordinate frame: +x = right, +y = up, +z = back - // Returns: Return an eye position at which the point cloud spans as - // much of the screen as possible (given the FOV controlled by - // geom_view_dist) without being cropped or sheared. - // Algorithm: - // 1.) Move the eye laterally to a point which attempts to maximize the - // the amount we can move forward without clipping the CRT screen. - // 2.) Move forward by as much as possible without clipping the CRT. - // Get the allowed movement range by solving for the eye_pos offsets - // that result in each point being projected to a screen edge/corner in - // pseudo-normalized device coords (where xy ranges from [-0.5, 0.5] - // and z = eyespace z): - // pndc_coord = float3(float2(eyespace_xyz.x, -eyespace_xyz.y)* - // geom_view_dist / (geom_aspect * -eyespace_xyz.z), eyespace_xyz.z); - // Notes: - // The field of view is controlled by geom_view_dist's magnitude relative to - // the view vector's x and y components: - // view_vec.xy ranges from [-0.5, 0.5] * geom_aspect - // view_vec.z = -geom_view_dist - // But for the purposes of perspective divide, it should be considered: - // view_vec.xy ranges from [-0.5, 0.5] * geom_aspect / geom_view_dist - // view_vec.z = -1.0 - static const int max_centering_iters = 1; // Keep for easy testing. - for(int iter = 0; iter < max_centering_iters; iter++) - { - // 0.) Get the eyespace coordinates of our point cloud: - float3 eyespace_coords[MAX_POINT_CLOUD_SIZE]; - for(int i = 0; i < num_points; i++) - { - eyespace_coords[i] = global_coords[i] - eye_pos; - } - // 1a.)For each point, find out how far we can move eye_pos in each - // lateral direction without the point clipping the frustum. - // Eyespace +y = up, screenspace +y = down, so flip y after - // applying the eyespace offset (on the way to "clip space"). - // Solve for two offsets per point based on: - // (eyespace_xyz.xy - offset_dr) * float2(1.0, -1.0) * - // geom_view_dist / (geom_aspect * -eyespace_xyz.z) = float2(-0.5) - // (eyespace_xyz.xy - offset_dr) * float2(1.0, -1.0) * - // geom_view_dist / (geom_aspect * -eyespace_xyz.z) = float2(0.5) - // offset_ul and offset_dr represent the farthest we can move the - // eye_pos up-left and down-right. Save the min of all offset_dr's - // and the max of all offset_ul's (since it's negative). - float abs_radius = abs(geom_radius); // In case anyone gets ideas. ;) - float2 offset_dr_min = float2(10.0 * abs_radius, 10.0 * abs_radius); - float2 offset_ul_max = float2(-10.0 * abs_radius, -10.0 * abs_radius); - for(int i = 0; i < num_points; i++) - { - static const float2 flipy = float2(1.0, -1.0); - float3 eyespace_xyz = eyespace_coords[i]; - float2 offset_dr = eyespace_xyz.xy - float2(-0.5, -0.5) * - (geom_aspect * -eyespace_xyz.z) / (geom_view_dist * flipy); - float2 offset_ul = eyespace_xyz.xy - float2(0.5, 0.5) * - (geom_aspect * -eyespace_xyz.z) / (geom_view_dist * flipy); - offset_dr_min = min(offset_dr_min, offset_dr); - offset_ul_max = max(offset_ul_max, offset_ul); - } - // 1b.)Update eye_pos: Adding the average of offset_ul_max and - // offset_dr_min gives it equal leeway on the top vs. bottom - // and left vs. right. Recalculate eyespace_coords accordingly. - float2 center_offset = 0.5 * (offset_ul_max + offset_dr_min); - eye_pos.xy += center_offset; - for(int i = 0; i < num_points; i++) - { - eyespace_coords[i] = global_coords[i] - eye_pos; - } - // 2a.)For each point, find out how far we can move eye_pos forward - // without the point clipping the frustum. Flip the y - // direction in advance (matters for a later step, not here). - // Solve for four offsets per point based on: - // eyespace_xyz_flipy.x * geom_view_dist / - // (geom_aspect.x * (offset_z - eyespace_xyz_flipy.z)) =-0.5 - // eyespace_xyz_flipy.y * geom_view_dist / - // (geom_aspect.y * (offset_z - eyespace_xyz_flipy.z)) =-0.5 - // eyespace_xyz_flipy.x * geom_view_dist / - // (geom_aspect.x * (offset_z - eyespace_xyz_flipy.z)) = 0.5 - // eyespace_xyz_flipy.y * geom_view_dist / - // (geom_aspect.y * (offset_z - eyespace_xyz_flipy.z)) = 0.5 - // We'll vectorize the actual computation. Take the maximum of - // these four for a single offset, and continue taking the max - // for every point (use max because offset.z is negative). - float offset_z_max = -10.0 * geom_radius * geom_view_dist; - for(int i = 0; i < num_points; i++) - { - float3 eyespace_xyz_flipy = eyespace_coords[i] * - float3(1.0, -1.0, 1.0); - float4 offset_zzzz = eyespace_xyz_flipy.zzzz + - (eyespace_xyz_flipy.xyxy * geom_view_dist) / - (float4(-0.5, -0.5, 0.5, 0.5) * float4(geom_aspect, geom_aspect)); - // Ignore offsets that push positive x/y values to opposite - // boundaries, and vice versa, and don't let the camera move - // past a point in the dead center of the screen: - offset_z_max = (eyespace_xyz_flipy.x < 0.0) ? - max(offset_z_max, offset_zzzz.x) : offset_z_max; - offset_z_max = (eyespace_xyz_flipy.y < 0.0) ? - max(offset_z_max, offset_zzzz.y) : offset_z_max; - offset_z_max = (eyespace_xyz_flipy.x > 0.0) ? - max(offset_z_max, offset_zzzz.z) : offset_z_max; - offset_z_max = (eyespace_xyz_flipy.y > 0.0) ? - max(offset_z_max, offset_zzzz.w) : offset_z_max; - offset_z_max = max(offset_z_max, eyespace_xyz_flipy.z); - } - // 2b.)Update eye_pos: Add the maximum (smallest negative) z offset. - eye_pos.z += offset_z_max; - } - return eye_pos; -} - -float3 get_ideal_global_eye_pos(const float3x3 local_to_global, - const float2 geom_aspect, const float geom_mode) -{ - // Start with an initial eye_pos that includes the entire primitive - // (sphere or cylinder) in its field-of-view: - const float3 high_view = float3(0.0, geom_aspect.y, -geom_view_dist); - const float3 low_view = high_view * float3(1.0, -1.0, 1.0); - const float len_sq = dot(high_view, high_view); - const float fov = abs(acos(dot(high_view, low_view)/len_sq)); - // Trigonometry/similar triangles say distance = geom_radius/sin(fov/2): - const float eye_z_spherical = geom_radius/sin(fov*0.5); - const float3 eye_pos = geom_mode < 2.5 ? - float3(0.0, 0.0, eye_z_spherical) : - float3(0.0, 0.0, max(geom_view_dist, eye_z_spherical)); - - // Get global xyz coords of extreme sample points on the simulated CRT - // screen. Start with the center, edge centers, and corners of the - // video image. We can't ignore backfacing points: They're occluded - // by closer points on the primitive, but they may NOT be occluded by - // the convex hull of the remaining samples (i.e. the remaining convex - // hull might not envelope points that do occlude a back-facing point.) - static const int num_points = MAX_POINT_CLOUD_SIZE; - float3 global_coords[MAX_POINT_CLOUD_SIZE]; - global_coords[0] = mul(local_to_global, uv_to_xyz(float2(0.0, 0.0), geom_aspect, geom_mode)); - global_coords[1] = mul(local_to_global, uv_to_xyz(float2(0.0, -0.5), geom_aspect, geom_mode)); - global_coords[2] = mul(local_to_global, uv_to_xyz(float2(0.0, 0.5), geom_aspect, geom_mode)); - global_coords[3] = mul(local_to_global, uv_to_xyz(float2(-0.5, 0.0), geom_aspect, geom_mode)); - global_coords[4] = mul(local_to_global, uv_to_xyz(float2(0.5, 0.0), geom_aspect, geom_mode)); - global_coords[5] = mul(local_to_global, uv_to_xyz(float2(-0.5, -0.5), geom_aspect, geom_mode)); - global_coords[6] = mul(local_to_global, uv_to_xyz(float2(0.5, -0.5), geom_aspect, geom_mode)); - global_coords[7] = mul(local_to_global, uv_to_xyz(float2(-0.5, 0.5), geom_aspect, geom_mode)); - global_coords[8] = mul(local_to_global, uv_to_xyz(float2(0.5, 0.5), geom_aspect, geom_mode)); - // Adding more inner image points could help in extreme cases, but too many - // points will kille the framerate. For safety, default to the initial - // eye_pos if any z coords are negative: - float num_negative_z_coords = 0.0; - for(int i = 0; i < num_points; i++) - { - num_negative_z_coords += float(global_coords[0].z < 0.0); - } - // Outsource the optimized eye_pos calculation: - return num_negative_z_coords > 0.5 ? eye_pos : - get_ideal_global_eye_pos_for_points(eye_pos, geom_aspect, - global_coords, num_points); -} - -float3x3 get_pixel_to_object_matrix(const float3x3 global_to_local, - const float3 eye_pos_local, const float3 view_vec_global, - const float3 intersection_pos_local, const float3 normal, - const float2 output_size_inv) -{ - // Requires: See get_curved_video_uv_coords_and_tangent_matrix for - // descriptions of each parameter. - // Returns: Return a transformation matrix from 2D pixel-space vectors - // (where (+1.0, +1.0) is a vector to one pixel down-right, - // i.e. same directionality as uv texels) to 3D object-space - // vectors in the CRT's local coordinate frame (right-handed) - // ***which are tangent to the CRT surface at the intersection - // position.*** (Basically, we want to convert pixel-space - // vectors to 3D vectors along the CRT's surface, for later - // conversion to uv vectors.) - // Shorthand inputs: - const float3 pos = intersection_pos_local; - const float3 eye_pos = eye_pos_local; - // Get a piecewise-linear matrix transforming from "pixelspace" offset - // vectors (1.0 = one pixel) to object space vectors in the tangent - // plane (faster than finding 3 view-object intersections). - // 1.) Get the local view vecs for the pixels to the right and down: - const float3 view_vec_right_global = view_vec_global + - float3(output_size_inv.x, 0.0, 0.0); - const float3 view_vec_down_global = view_vec_global + - float3(0.0, -output_size_inv.y, 0.0); - const float3 view_vec_right_local = - mul(global_to_local, view_vec_right_global); - const float3 view_vec_down_local = - mul(global_to_local, view_vec_down_global); - // 2.) Using the true intersection point, intersect the neighboring - // view vectors with the tangent plane: - const float3 intersection_vec_dot_normal = float3(dot(pos - eye_pos, normal), dot(pos - eye_pos, normal), dot(pos - eye_pos, normal)); - const float3 right_pos = eye_pos + (intersection_vec_dot_normal / - dot(view_vec_right_local, normal))*view_vec_right_local; - const float3 down_pos = eye_pos + (intersection_vec_dot_normal / - dot(view_vec_down_local, normal))*view_vec_down_local; - // 3.) Subtract the original intersection pos from its neighbors; the - // resulting vectors are object-space vectors tangent to the plane. - // These vectors are the object-space transformations of (1.0, 0.0) - // and (0.0, 1.0) pixel offsets, so they form the first two basis - // vectors of a pixelspace to object space transformation. This - // transformation is 2D to 3D, so use (0, 0, 0) for the third vector. - const float3 object_right_vec = right_pos - pos; - const float3 object_down_vec = down_pos - pos; - const float3x3 pixel_to_object = float3x3( - object_right_vec.x, object_down_vec.x, 0.0, - object_right_vec.y, object_down_vec.y, 0.0, - object_right_vec.z, object_down_vec.z, 0.0); - return pixel_to_object; -} - -float3x3 get_object_to_tangent_matrix(const float3 intersection_pos_local, - const float3 normal, const float2 geom_aspect, const float geom_mode) -{ - // Requires: See get_curved_video_uv_coords_and_tangent_matrix for - // descriptions of each parameter. - // Returns: Return a transformation matrix from 3D object-space vectors - // in the CRT's local coordinate frame (right-handed, +y = up) - // to 2D video_uv vectors (+v = down). - // Description: - // The TBN matrix formed by the [tangent, bitangent, normal] basis - // vectors transforms ordinary vectors from tangent->object space. - // The cotangent matrix formed by the [cotangent, cobitangent, normal] - // basis vectors transforms normal vectors (covectors) from - // tangent->object space. It's the inverse-transpose of the TBN matrix. - // We want the inverse of the TBN matrix (transpose of the cotangent - // matrix), which transforms ordinary vectors from object->tangent space. - // Start by calculating the relevant basis vectors in accordance with - // Christian Schüler's blog post "Followup: Normal Mapping Without - // Precomputed Tangents": http://www.thetenthplanet.de/archives/1180 - // With our particular uv mapping, the scale of the u and v directions - // is determined entirely by the aspect ratio for cylindrical and ordinary - // spherical mappings, and so tangent and bitangent lengths are also - // determined by it (the alternate mapping is more complex). Therefore, we - // must ensure appropriate cotangent and cobitangent lengths as well. - // Base these off the uv<=>xyz mappings for each primitive. - const float3 pos = intersection_pos_local; - static const float3 x_vec = float3(1.0, 0.0, 0.0); - static const float3 y_vec = float3(0.0, 1.0, 0.0); - // The tangent and bitangent vectors correspond with increasing u and v, - // respectively. Mathematically we'd base the cotangent/cobitangent on - // those, but we'll compute the cotangent/cobitangent directly when we can. - float3 cotangent_unscaled, cobitangent_unscaled; - // geom_mode should be constant-folded without _RUNTIME_GEOMETRY_MODE. - if(geom_mode < 1.5) - { - // Sphere: - // tangent = normalize(cross(normal, cross(x_vec, pos))) * geom_aspect.x - // bitangent = normalize(cross(cross(y_vec, pos), normal)) * geom_aspect.y - // inv_determinant = 1.0/length(cross(bitangent, tangent)) - // cotangent = cross(normal, bitangent) * inv_determinant - // == normalize(cross(y_vec, pos)) * geom_aspect.y * inv_determinant - // cobitangent = cross(tangent, normal) * inv_determinant - // == normalize(cross(x_vec, pos)) * geom_aspect.x * inv_determinant - // Simplified (scale by inv_determinant below): - cotangent_unscaled = normalize(cross(y_vec, pos)) * geom_aspect.y; - cobitangent_unscaled = normalize(cross(x_vec, pos)) * geom_aspect.x; - } - else if(geom_mode < 2.5) - { - // Sphere, alternate mapping: - // This mapping works a bit like the cylindrical mapping in two - // directions, which makes the lengths and directions more complex. - // Unfortunately, I can't find much of a shortcut: - const float3 tangent = normalize( - cross(y_vec, float3(pos.x, 0.0, pos.z))) * geom_aspect.x; - const float3 bitangent = normalize( - cross(x_vec, float3(0.0, pos.yz))) * geom_aspect.y; - cotangent_unscaled = cross(normal, bitangent); - cobitangent_unscaled = cross(tangent, normal); - } - else - { - // Cylinder: - // tangent = normalize(cross(y_vec, normal)) * geom_aspect.x; - // bitangent = float3(0.0, -geom_aspect.y, 0.0); - // inv_determinant = 1.0/length(cross(bitangent, tangent)) - // cotangent = cross(normal, bitangent) * inv_determinant - // == normalize(cross(y_vec, pos)) * geom_aspect.y * inv_determinant - // cobitangent = cross(tangent, normal) * inv_determinant - // == float3(0.0, -geom_aspect.x, 0.0) * inv_determinant - cotangent_unscaled = cross(y_vec, normal) * geom_aspect.y; - cobitangent_unscaled = float3(0.0, -geom_aspect.x, 0.0); - } - const float3 computed_normal = - cross(cobitangent_unscaled, cotangent_unscaled); - const float inv_determinant = rsqrt(dot(computed_normal, computed_normal)); - const float3 cotangent = cotangent_unscaled * inv_determinant; - const float3 cobitangent = cobitangent_unscaled * inv_determinant; - // The [cotangent, cobitangent, normal] column vecs form the cotangent - // frame, i.e. the inverse-transpose TBN matrix. Get its transpose: - const float3x3 object_to_tangent = float3x3(cotangent, cobitangent, normal); - return object_to_tangent; -} - -float2 get_curved_video_uv_coords_and_tangent_matrix( - const float2 flat_video_uv, const float3 eye_pos_local, - const float2 output_size_inv, const float2 geom_aspect, - const float geom_mode, const float3x3 global_to_local, - out float2x2 pixel_to_tangent_video_uv) -{ - // Requires: Parameters: - // 1.) flat_video_uv coords are in range [0.0, 1.0], where - // (0.0, 0.0) is the top-left corner of the screen and - // (1.0, 1.0) is the bottom-right corner. - // 2.) eye_pos_local is the 3D camera position in the simulated - // CRT's local coordinate frame. For best results, it must - // be computed based on the same geom_view_dist used here. - // 3.) output_size_inv = float2(1.0)/IN.output_size - // 4.) geom_aspect = get_aspect_vector( - // IN.output_size.x / IN.output_size.y); - // 5.) geom_mode is a static or runtime mode setting: - // 0 = off, 1 = sphere, 2 = sphere alt., 3 = cylinder - // 6.) global_to_local is a 3x3 matrix transforming (ordinary) - // worldspace vectors to the CRT's local coordinate frame - // Globals: - // 1.) geom_view_dist must be > 0.0. It controls the "near - // plane" used to interpret flat_video_uv as a view - // vector, which controls the field of view (FOV). - // Returns: Return final uv coords in [0.0, 1.0], and return a pixel- - // space to video_uv tangent-space matrix in the out parameter. - // (This matrix assumes pixel-space +y = down, like +v = down.) - // We'll transform flat_video_uv into a view vector, project - // the view vector from the camera/eye, intersect with a sphere - // or cylinder representing the simulated CRT, and convert the - // intersection position into final uv coords and a local - // transformation matrix. - // First get the 3D view vector (geom_aspect and geom_view_dist are globals): - // 1.) Center uv around (0.0, 0.0) and make (-0.5, -0.5) and (0.5, 0.5) - // correspond to the top-left/bottom-right output screen corners. - // 2.) Multiply by geom_aspect to preemptively "undo" Retroarch's screen- - // space 2D aspect correction. We'll reapply it in uv-space. - // 3.) (x, y) = (u, -v), because +v is down in 2D screenspace, but +y - // is up in 3D worldspace (enforce a right-handed system). - // 4.) The view vector z controls the "near plane" distance and FOV. - // For the effect of "looking through a window" at a CRT, it should be - // set equal to the user's distance from their physical screen, in - // units of the viewport's physical diagonal size. - const float2 view_uv = (flat_video_uv - float2(0.5, 0.5)) * geom_aspect; - const float3 view_vec_global = - float3(view_uv.x, -view_uv.y, -geom_view_dist); - // Transform the view vector into the CRT's local coordinate frame, convert - // to video_uv coords, and get the local 3D intersection position: - const float3 view_vec_local = mul(global_to_local, view_vec_global); - float3 pos; - const float2 centered_uv = view_vec_to_uv( - view_vec_local, eye_pos_local, geom_aspect, geom_mode, pos); - const float2 video_uv = centered_uv + float2(0.5, 0.5); - // Get a pixel-to-tangent-video-uv matrix. The caller could deal with - // all but one of these cases, but that would be more complicated. - #if _DRIVERS_ALLOW_DERIVATIVES - // Derivatives obtain a matrix very fast, but the direction of pixel- - // space +y seems to depend on the pass. Enforce the correct direction - // on a best-effort basis (but it shouldn't matter for antialiasing). - const float2 duv_dx = ddx(video_uv); - const float2 duv_dy = ddy(video_uv); - #ifdef LAST_PASS - pixel_to_tangent_video_uv = float2x2( - duv_dx.x, duv_dy.x, - -duv_dx.y, -duv_dy.y); - #else - pixel_to_tangent_video_uv = float2x2( - duv_dx.x, duv_dy.x, - duv_dx.y, duv_dy.y); - #endif - #else - // Manually define a transformation matrix. We'll assume pixel-space - // +y = down, just like +v = down. - if(geom_force_correct_tangent_matrix) - { - // Get the surface normal based on the local intersection position: - const float3 normal_base = geom_mode < 2.5 ? pos : - float3(pos.x, 0.0, pos.z); - const float3 normal = normalize(normal_base); - // Get pixel-to-object and object-to-tangent matrices and combine - // them into a 2x2 pixel-to-tangent matrix for video_uv offsets: - const float3x3 pixel_to_object = get_pixel_to_object_matrix( - global_to_local, eye_pos_local, view_vec_global, pos, normal, - output_size_inv); - const float3x3 object_to_tangent = get_object_to_tangent_matrix( - pos, normal, geom_aspect, geom_mode); - const float3x3 pixel_to_tangent3x3 = - mul(object_to_tangent, pixel_to_object); - pixel_to_tangent_video_uv = float2x2( - pixel_to_tangent3x3[0][0], pixel_to_tangent3x3[0][1], pixel_to_tangent3x3[1][0], pixel_to_tangent3x3[1][1]);//._m00_m01_m10_m11); - } - else - { - // Ignore curvature, and just consider flat scaling. The - // difference is only apparent with strong curvature: - pixel_to_tangent_video_uv = float2x2( - output_size_inv.x, 0.0, 0.0, output_size_inv.y); - } - #endif - return video_uv; -} - -float get_border_dim_factor(const float2 video_uv, const float2 geom_aspect) -{ - // COPYRIGHT NOTE FOR THIS FUNCTION: - // Copyright (C) 2010-2012 cgwg, 2014 TroggleMonkey - // This function uses an algorithm first coded in several of cgwg's GPL- - // licensed lines in crt-geom-curved.cg and its ancestors. The line - // between algorithm and code is nearly indistinguishable here, so it's - // unclear whether I could even release this project under a non-GPL - // license with this function included. - - // Calculate border_dim_factor from the proximity to uv-space image - // borders; geom_aspect/border_size/border/darkness/border_compress are globals: - const float2 edge_dists = min(video_uv, float2(1.0, 1.0) - video_uv) * - geom_aspect; - const float2 border_penetration = - max(float2(border_size, border_size) - edge_dists, float2(0.0, 0.0)); - const float penetration_ratio = border_size > 0 ? length(border_penetration)/border_size : 0; - const float border_escape_ratio = max(1.0 - penetration_ratio, 0.0); - const float border_dim_factor = - pow(border_escape_ratio, border_darkness) * max(1.0, border_compress); - return min(border_dim_factor, 1.0); -} - - - -#endif // _GEOMETRY_FUNCTIONS_H - - - diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/helper-functions-and-macros.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/helper-functions-and-macros.fxh deleted file mode 100644 index d9e1820df..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/helper-functions-and-macros.fxh +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _HELPER_FUNCTIONS_AND_MACROS_H -#define _HELPER_FUNCTIONS_AND_MACROS_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2020 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -float4 tex2D_nograd(sampler2D tex, float2 tex_coords) -{ - return tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0); -} - -// ReShade 4 does not permit the use of functions or the ternary operator -// outside of a function definition. This is a problem for this port -// because the original crt-royale shader makes heavy use of these -// constructs at the root level. - -// These preprocessor definitions are a workaround for this limitation. -// Note that they are strictly intended for defining complex global -// constants. I doubt they're more performant than the built-in -// equivalents, so I recommend using the built-ins whenever you can. - - -#define macro_sign(c) -((int) ((c) != 0)) * -((int) ((c) > 0)) -#define macro_abs(c) (c) * macro_sign(c) - -#define macro_min(c, d) (c) * ((int) ((c) <= (d))) + (d) * ((int) ((c) > (d))) -#define macro_max(c, d) (c) * ((int) ((c) >= (d))) + (d) * ((int) ((c) < (d))) -#define macro_clamp(c, l, u) macro_min(macro_max(c, l), u) - -#define macro_ceil(c) (float) ((int) (c) + (int) (((int) (c)) < (c))) - -#define macro_cond(c, a, b) float(c) * (a) + float(!(c)) * (b) - - - -//////////////////////// COMMON MATHEMATICAL CONSTANTS /////////////////////// - -static const float pi = 3.141592653589; -// We often want to find the location of the previous texel, e.g.: -// const float2 curr_texel = uv * texture_size; -// const float2 prev_texel = floor(curr_texel - float2(0.5)) + float2(0.5); -// const float2 prev_texel_uv = prev_texel / texture_size; -// However, many GPU drivers round incorrectly around exact texel locations. -// We need to subtract a little less than 0.5 before flooring, and some GPU's -// require this value to be farther from 0.5 than others; define it here. -// const float2 prev_texel = -// floor(curr_texel - float2(under_half)) + float2(0.5); -static const float under_half = 0.4995; - -// Avoid dividing by zero; using a macro overloads for float, float2, etc.: -#define FIX_ZERO(c) (macro_max(macro_abs(c), 0.0000152587890625)) // 2^-16 - -// #define fmod(x, y) ((x) - (y) * floor((x)/(y) + FIX_ZERO(0.0))) -#define fmod(x, y) (frac((x) / (y)) * (y)) - -#endif // _HELPER_FUNCTIONS_AND_MACROS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/phosphor-mask-calculations.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/phosphor-mask-calculations.fxh deleted file mode 100644 index 99b1d021c..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/phosphor-mask-calculations.fxh +++ /dev/null @@ -1,624 +0,0 @@ -#ifndef _PHOSHOR_MASK_CALCULATIONS_H -#define _PHOSHOR_MASK_CALCULATIONS_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2020 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -/* - * Our goal is to use arithmetic to generate the phosphor mask. - * Phosphor masks are regular patterns, so we want something periodic. - * We need to avoid integer arithmetic because it tends to cause rounding errors. - * - * For all masks, we want to approximate a pulse wave in at least one dimension. This pulse wave - * will have narrow peaks, wide troughs, and constant periodicity. - * GRILLE will have a pulse wave along the x-axis and will be constant along the y-axis. - * SLOT and SHADOW will likely have a superposition of two out-of-phase pulse waves along each axis. - * For SHADOW, the width of the peaks will vary such that they generate ellipsoids on the screen. - * - * We can get a periodic function by starting with a triangle wave: T(t, f) = abs(1 - 2*frac(t * f)). - * This function gives us a triangle wave with f cycles in the domain [0, 1]. - * Note that T(0, f) = 1. - * - * Then we can compose this with a sigmoid curve to squish the triangle wave into a pulse wave. - * P(s, p, q) = exp(q s - q/2) / (exp(q s - q/2) + exp(-p)) - * s(t, f, o) = T(t*f - o, 1) - * - * f is the number of pulses to render along the given axis. - * o is the channel's horizontal ofset along the given axis, normalized via the quotient raw_offset / raw_triad width. - * p and q control how closely P resembles an ideal pulse wave and also how wide the peaks and troughs are. - * - * The interaction between p and q is rather complicated and difficult to describe, so they're not a good pair - * of parameters for users. But we have the info necessary to solve for p in terms of q. - * We know the width of a phosphor and the width of a triad, and we know the domain and range of P. - * We can choose a coordinate (t0, y0) that will denote the edge of the phosphor. - * Note that y0 = P(t0, p, q) for some p and q. - * We let t0 = raw_phosphor_width / raw_triad_width, since we need to respect the shape of the phosphor. - * We let the user define P(t0). - * Technically, this means the user is defining the brightness of the phosphor's furthest edge. - * Visually, this looks like the user is defining the width of the phosphor. - * We'll call this the Phosphor Thickness. - * We let the user define q. - * Technically, this means the user is defining the squareness of the pulse wave. - * Visually, this looks like the user is defining the sharpness of the phosphor. - * We'll call this the Phosphor Sharpness. - * - * We can solve for p in terms of q very efficiently. - * p = (ln(y0 / (1 - y0)) - q) / (0.5 - 2 t0) - * - * Note that, if you work through the algebra, you get a denominator of (t0 - 0.5). - * Using (0.5 - 2 t0) actually works better. It also matches up when you try plotting P and (t0, y0). - * - * For the GRILLE and SLOT masks, we can compute p once and recycle it. - * For the SHADOW mask, we can either compute p on each iteration or find a way to interpolate between min_p and max_p. - * - * One might expect it'd be way better to use a clamped triangle wave rather than a sigmoid or exponentiated cosine wave. - * As far as I can tell, this ends up being incorrect surprisingly enough. Although it's a good bit faster, - * it has terrible aliasing artifacts at small scales. The other implementations are slower, but they produce - * evenly-sized RGB phosphors for a variety of configurations even when the triad width is 3 pixels. At that - * scale, the triangle wave approach produces triads where one of the phosphors is thicker than the others. - * Taking into account the compute_mask_factor trick, the triangle wave approach would be a negligible - * performance improvement at the cost of a large drop in visual quality and user friendliness. - */ - - -#include "bind-shader-params.fxh" -#include "scanline-functions.fxh" - -/* - * The GRILLE mask consists of an array of vertical stripes, so each channel will vary along the x-axis and will be constant - * along the y-axis. - * - * It has the following dimensions: - * Phosphors are 18 units wide with unbounded height. - * Phosphors in a triad are 2 units apart. - * Triads are 6 units apart. - * Triad centers are 64 units apart. - * The phosphors follow an RGB pattern. - * The left-most phosphor is red and offset by 3 units to the right. - */ -static const float grille_raw_phosphor_width = 18; -static const float grille_raw_phosphor_gap = 2; -static const float grille_raw_triad_horiz_gap = 6; -static const float grille_raw_triad_width = 3*grille_raw_phosphor_width + 2*grille_raw_phosphor_gap + grille_raw_triad_horiz_gap; - -static const float grille_raw_r_offset = (grille_raw_triad_horiz_gap + grille_raw_phosphor_width) / 2; -static const float grille_raw_g_offset = grille_raw_r_offset + grille_raw_phosphor_width + grille_raw_phosphor_gap; -static const float grille_raw_b_offset = grille_raw_g_offset + grille_raw_phosphor_width + grille_raw_phosphor_gap; -static const float3 grille_norm_center_offsets = float3( - grille_raw_r_offset, - grille_raw_g_offset, - grille_raw_b_offset -) / grille_raw_triad_width; - -static const float grille_edge_t = grille_raw_phosphor_width / 2; -static const float grille_edge_norm_t = grille_edge_t / grille_raw_triad_width; - - -/* - * The SLOT mask consists of an array of rectangles, so each channel will vary along both the x- and y-axes. - * - * It has the following dimensions: - * Phosphors are 18 units wide and 66 units tall. - * Phosphors in a triad are 2 units apart. - * Triads are 6 units apart horizontally and 6 units apart vertically. - * Triad centers are 64 units apart horizontally and 73 units apart vertically. - * The phosphors follow an RGB pattern. - * The upper-left-most phosphor is red and offset by 3 units to the right and 3 units down. - */ -static const float slot_raw_phosphor_width = 18; -static const float slot_raw_phosphor_gap = 2; -static const float slot_raw_triad_horiz_gap = 6; -static const float slot_raw_triad_width = 3*slot_raw_phosphor_width + 2*slot_raw_phosphor_gap + slot_raw_triad_horiz_gap; - -static const float slot_raw_phosphor_height = 66; -static const float slot_raw_triad_vert_gap = 6; -static const float slot_raw_triad_height = slot_raw_phosphor_height + slot_raw_triad_vert_gap; - -static const float slot_aspect_ratio = slot_raw_triad_height / slot_raw_triad_width; - -static const float slot_raw_r_offset_x = (slot_raw_triad_horiz_gap + slot_raw_phosphor_width) / 2; -static const float slot_raw_g_offset_x = slot_raw_r_offset_x + slot_raw_phosphor_width + slot_raw_phosphor_gap; -static const float slot_raw_b_offset_x = slot_raw_g_offset_x + slot_raw_phosphor_width + slot_raw_phosphor_gap; -static const float3 slot_norm_center_offsets_x = float3( - slot_raw_r_offset_x, - slot_raw_g_offset_x, - slot_raw_b_offset_x -) / slot_raw_triad_width; -static const float3 slot_norm_center_offsets_y = float3(0.5, 0.5, 0.5); - -static const float slot_edge_tx = slot_raw_phosphor_width / 2; -// We draw the slot mask as two sets of columns. To do that, we have to pretend the horizontal gap is the size of a whole triad. -// Then we need to halve the position of the phosphor edge. -static const float slot_edge_norm_tx = 0.5 * slot_edge_tx / slot_raw_triad_width; -static const float slot_edge_ty = slot_raw_phosphor_height / 2; -static const float slot_edge_norm_ty = slot_edge_ty / slot_raw_triad_height; - -/* - * The SHADOW mask consists of an array of circles, so each channel will vary along both the x- and y-axes. - * - * It has the following dimensions: - * Phosphors are 21 units in diameter. - * All phosphors are 0 units apart. - * Triad centers are 63 units apart horizontally and 21 units apart vertically. - * The phosphors follow a GBR pattern on odd rows and RBG on even rows. - * The upper-left-most phosphor is green and centered on the corner of the screen. - */ -static const float shadow_raw_phosphor_diam = 21; -static const float shadow_raw_phosphor_gap = 0; -static const float shadow_raw_triad_horiz_gap = 0; -static const float shadow_raw_triad_vert_gap = 0; - -static const float shadow_raw_triad_width = 3*shadow_raw_phosphor_diam + 2*shadow_raw_phosphor_gap + shadow_raw_triad_horiz_gap; -static const float shadow_raw_triad_height = shadow_raw_phosphor_diam + shadow_raw_triad_vert_gap; - -static const float shadow_aspect_ratio = shadow_raw_triad_height / shadow_raw_triad_width; - -static const float shadow_raw_g_offset_x = 0; -static const float shadow_raw_b_offset_x = shadow_raw_g_offset_x + shadow_raw_phosphor_diam + shadow_raw_phosphor_gap; -static const float shadow_raw_r_offset_x = shadow_raw_b_offset_x + shadow_raw_phosphor_diam + shadow_raw_phosphor_gap; -static const float3 shadow_norm_center_offsets_x = float3( - shadow_raw_r_offset_x, - shadow_raw_g_offset_x, - shadow_raw_b_offset_x -) / shadow_raw_triad_width; - -static const float3 shadow_norm_center_offsets_y = float3(0.0, 0.0, 0.0); - -static const float shadow_edge_tx = shadow_raw_phosphor_diam / 2; -static const float shadow_edge_norm_tx = shadow_edge_tx / shadow_raw_triad_width; -static const float shadow_edge_ty = shadow_raw_phosphor_diam / 2; -// We draw the shadow mask as two sets of rows. To do that, we have to pretend the vertical gap is the size of a whole triad. -// Then we need to halve the position of the phosphor edge. -static const float shadow_edge_norm_ty = 0.5 * shadow_edge_ty / shadow_raw_triad_height; -static const float shadow_norm_phosphor_rad = (shadow_raw_phosphor_diam/2) / shadow_raw_triad_width; - - -/* - * The SMALL GRILLE mask is composed of magenta and green stripes. - * Sourced from http://filthypants.blogspot.com/2020/02/crt-shader-masks.html - * - * It has the following dimensions: - * Stripes are 32 units wide. - * Stripes in a triad are 0 units apart. - * Triads are 0 units apart horizontally. - * - * Each triad has two quads, side-by-side and aligned. - * Neighboring triads are offset vertically. - * Below is an array of 2 triads. - * x's denote magenta stripes, and o's denote green ones. - * - * xxooxxoo - * xxooxxoo - * xxooxxoo - * xxooxxoo - * xxooxxoo - * xxooxxoo - * - * The phosphors follow a MG pattern. - * The left-most phosphor is magenta and offset by 16 units to the right. - */ - -static const float smallgrille_raw_stripe_width = 32; -static const float smallgrille_raw_triad_width = 2*smallgrille_raw_stripe_width; - -static const float smallgrille_raw_r_offset_x = 0.5 * smallgrille_raw_stripe_width; -static const float smallgrille_raw_g_offset_x = smallgrille_raw_r_offset_x + smallgrille_raw_stripe_width; -static const float smallgrille_raw_b_offset_x = smallgrille_raw_r_offset_x; -static const float3 smallgrille_norm_center_offsets_x = float3( - smallgrille_raw_r_offset_x, - smallgrille_raw_g_offset_x, - smallgrille_raw_b_offset_x -) / smallgrille_raw_triad_width; - -static const float smallgrille_edge_t = 0.5 * smallgrille_raw_stripe_width; -static const float smallgrille_edge_norm_t = smallgrille_edge_t / smallgrille_raw_triad_width; - - -/* - * The SMALL SLOT mask is composed of magenta and green quads. - * Sourced from http://filthypants.blogspot.com/2020/02/crt-shader-masks.html - * - * It has the following dimensions: - * Quads are 32 units wide and 48 units tall. - * Quads in a triad are 0 units apart. - * Triads are 0 units apart horizontally and 16 units apart vertically. - * - * Each triad has two quads, side-by-side and aligned. - * Neighboring triads are offset vertically. - * Below is a 2x2 matrix of 4 triads. - * x's denote magenta quads, and o's denote green ones. - * - * xxoo - * xxooxxoo - * xxooxxoo - * xxoo - * xxoo - * xxooxxoo - * xxooxxoo - * xxoo - * - * The phosphors follow a MG pattern. - * The upper-left-most phosphor is magenta and offset by 16 units to the right and 16 units down. - */ - -static const float smallslot_raw_quad_width = 32; -static const float smallslot_raw_triad_width = 2*smallslot_raw_quad_width; - -static const float smallslot_raw_quad_height = 1.5 * smallslot_raw_quad_width; -static const float smallslot_raw_triad_vert_gap = 0.5 * smallslot_raw_quad_width; -static const float smallslot_raw_triad_height = smallslot_raw_quad_height + smallslot_raw_triad_vert_gap; - -static const float smallslot_aspect_ratio = smallslot_raw_triad_height / smallslot_raw_triad_width; - -static const float smallslot_raw_r_offset_x = 0.5 * smallslot_raw_quad_width; -static const float smallslot_raw_g_offset_x = smallslot_raw_r_offset_x + smallslot_raw_quad_width; -static const float smallslot_raw_b_offset_x = smallslot_raw_r_offset_x; -static const float3 smallslot_norm_center_offsets_x = float3( - smallslot_raw_r_offset_x, - smallslot_raw_g_offset_x, - smallslot_raw_b_offset_x -) / smallslot_raw_triad_width; - -static const float3 smallslot_norm_center_offsets_y1 = 0.5 * smallslot_raw_quad_height / smallslot_raw_triad_height; -static const float3 smallslot_norm_center_offsets_y2 = smallslot_norm_center_offsets_y1 + smallslot_raw_triad_vert_gap / smallslot_raw_triad_height; - -static const float smallslot_edge_tx = 0.5 * smallslot_raw_quad_width; -// We draw the slot mask as two sets of columns. To do that, we have to pretend the horizontal gap is the size of a whole triad. -// Then we need to halve the position of the phosphor edge. -static const float smallslot_edge_norm_tx = 0.5 * smallslot_edge_tx / smallslot_raw_triad_width; -static const float smallslot_edge_ty = smallslot_raw_quad_height / 2; -static const float smallslot_edge_norm_ty = smallslot_edge_ty / smallslot_raw_triad_height; - -/* - * The SMALL SHADOW mask is composed of magenta and green quads. - * Sourced from http://filthypants.blogspot.com/2020/02/crt-shader-masks.html - * - * It has the following dimensions: - * Quads are 17 units wide and 17 units tall. - * Quads in a triad are 0 units apart. - * Triads are 0 units apart horizontally and 0 units apart vertically. - * - * Each triad has two quads, side-by-side and aligned. - * Neighboring triads are offset vertically. - * Below is a 2x2 matrix of 4 triads. - * x's denote magenta quads, and o's denote green ones. - * - * xxooxxoo - * xxooxxoo - * ooxxooxx - * ooxxooxx - * - * The phosphors follow a MG pattern. - * The upper-left-most phosphor is magenta and offset by 16 units to the right and 16 units down. - */ - -static const float smallshadow_raw_quad_width = 17; -static const float smallshadow_raw_triad_width = 2 * smallshadow_raw_quad_width; - -static const float smallshadow_raw_quad_height = 17; -static const float smallshadow_raw_triad_height = smallshadow_raw_quad_height; - -static const float smallshadow_aspect_ratio = smallshadow_raw_triad_height / smallshadow_raw_triad_width; - -static const float smallshadow_raw_r_offset_x = 0.5 * smallshadow_raw_quad_width; -static const float smallshadow_raw_g_offset_x = smallshadow_raw_r_offset_x + smallshadow_raw_quad_width; -static const float smallshadow_raw_b_offset_x = smallshadow_raw_r_offset_x; -static const float3 smallshadow_norm_center_offsets_x = float3( - smallshadow_raw_r_offset_x, - smallshadow_raw_g_offset_x, - smallshadow_raw_b_offset_x -) / smallshadow_raw_triad_width; - -static const float3 smallshadow_norm_center_offsets_y = 0.5 * smallshadow_raw_triad_height; - -static const float smallshadow_edge_tx = 0.5 * smallshadow_raw_quad_width; -static const float smallshadow_edge_norm_tx = smallshadow_edge_tx / smallshadow_raw_triad_width; -static const float smallshadow_edge_ty = 0.5 * smallshadow_raw_quad_height; -// We draw the shadow mask as two sets of rows. To do that, we have to pretend the vertical gap is the size of a whole triad. -// Then we need to halve the position of the phosphor edge. -static const float smallshadow_edge_norm_ty = 0.5 * smallshadow_edge_ty / smallshadow_raw_triad_height; - - - - -float get_selected_aspect_ratio() { - float aspect_ratio; - [flatten] - if (mask_type == 0 || mask_type == 3) { - aspect_ratio = scale_triad_height; - } - else if (mask_type == 1 || mask_type == 4) { - aspect_ratio = scale_triad_height * slot_aspect_ratio; - } - else { - aspect_ratio = scale_triad_height * shadow_aspect_ratio; - } - [flatten] - switch (mask_type) { - case 0: - aspect_ratio = scale_triad_height; - break; - case 1: - aspect_ratio = scale_triad_height * slot_aspect_ratio; - break; - case 2: - aspect_ratio = scale_triad_height * shadow_aspect_ratio; - break; - case 3: - aspect_ratio = scale_triad_height; - break; - case 4: - aspect_ratio = scale_triad_height * smallslot_aspect_ratio; - break; - default: - aspect_ratio = scale_triad_height * smallshadow_aspect_ratio; - break; - } - - return aspect_ratio; -} - -float2 calc_triad_size() { - const float aspect_ratio = get_selected_aspect_ratio(); - - [branch] - if (mask_size_param == 0) { - return float2(1, aspect_ratio) * mask_triad_width; - } - else { - float triad_width = content_size.x * rcp(mask_num_triads_across); - return float2(1, aspect_ratio) * triad_width; - } - -} - -float2 calc_phosphor_viewport_frequency_factor() { - const float aspect_ratio = get_selected_aspect_ratio(); - - float2 triad_size_factor; - float2 num_triads_factor; - [branch] - if (geom_rotation_mode == 0 || geom_rotation_mode == 2) { - triad_size_factor = content_size * rcp(mask_triad_width * float2(1, aspect_ratio)); - num_triads_factor = mask_num_triads_across * float2(1, content_size.y * rcp(content_size.x) * rcp(aspect_ratio)); - } - else { - triad_size_factor = content_size * rcp(mask_triad_width * float2(1, aspect_ratio)).yx; - num_triads_factor = mask_num_triads_across * float2(1, content_size.y * rcp(content_size.x) * rcp(aspect_ratio)).yx; - } - - return ((mask_size_param == 0) ? triad_size_factor : num_triads_factor); -} - - -/* - * We have a pulse wave f(t0_norm, p, q) = y0 with unknown p. - * This function solves for p. - */ -#define calculate_phosphor_p_value(t0_norm, y0, q) (log((y0) * rcp(1 - (y0))) - (q) * (0.5 - 2*(t0_norm))) - -/* - * If we don't rescale the phosphor_thickness parameter, it has a logarithmic effect on the phosphor shape. - * Rescaling it makes it look closer to a linear effect. - */ -#define linearize_phosphor_thickness_param(p) (1 - exp(-(p))) - - -/* - * Generates a grille mask with the desired resolution and sharpness. - */ -float3 get_phosphor_intensity_grille( - const float2 texcoord, - const float2 viewport_frequency_factor, - const float2 grille_pq -) { - float3 center_offsets = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - grille_norm_center_offsets.bgr : grille_norm_center_offsets; - - center_offsets += phosphor_offset_x * 0.5; - - float3 theta = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets, 1); - float3 alpha = exp((theta - 0.5) * grille_pq.y); - return alpha * rcp(alpha + grille_pq.x); -} - - -/* - * Generates a slot mask with the desired resolution and sharpness. - */ -float3 get_phosphor_intensity_slot( - const float2 texcoord, - const float2 viewport_frequency_factor, - const float2 slot_pq_x, - const float2 slot_pq_y -) { - float3 center_offsets_x = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - slot_norm_center_offsets_x.bgr : slot_norm_center_offsets_x; - float3 center_offsets_y = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - slot_norm_center_offsets_y.bgr : slot_norm_center_offsets_y; - - center_offsets_x += phosphor_offset_x * 0.5; - center_offsets_y += phosphor_offset_y * 0.5; - - float3 theta_x1 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x, 0.5); - float3 alpha_x1 = exp((theta_x1 - 0.5) * slot_pq_x.y); - alpha_x1 *= rcp(alpha_x1 + slot_pq_x.x); - - float3 theta_x2 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x + 1, 0.5); - float3 alpha_x2 = exp((theta_x2 - 0.5) * slot_pq_x.y); - alpha_x2 *= rcp(alpha_x2 + slot_pq_x.x); - - float3 theta_y1 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y, 1); - float3 alpha_y1 = exp((theta_y1 - 0.5) * slot_pq_y.y); - alpha_y1 *= rcp(alpha_y1 + slot_pq_y.x); - - float3 theta_y2 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y + 0.5, 1); - float3 alpha_y2 = exp((theta_y2 - 0.5) * slot_pq_y.y); - alpha_y2 *= rcp(alpha_y2 + slot_pq_y.x); - - return alpha_x1 * alpha_y1 + alpha_x2 * alpha_y2; -} - -/* - * Generates a shadow mask with the desired resolution and sharpness. - */ -float3 get_phosphor_intensity_shadow( - const float2 texcoord, - const float2 viewport_frequency_factor, - const float2 shadow_q -) { - float3 center_offsets_x = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - shadow_norm_center_offsets_x.bgr : shadow_norm_center_offsets_x; - float3 center_offsets_y = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - shadow_norm_center_offsets_y.bgr : shadow_norm_center_offsets_y; - - center_offsets_x += phosphor_offset_x * 0.5; - center_offsets_y += phosphor_offset_y * 0.5; - - const float2 thickness_scaled = linearize_phosphor_thickness_param(phosphor_thickness); - - const float3 x_adj = texcoord.x * viewport_frequency_factor.x - center_offsets_x; - const float3 y_adj = texcoord.y * viewport_frequency_factor.y - center_offsets_y; - - const float3 texcoord_x_periodic1 = shadow_norm_phosphor_rad * triangle_wave(x_adj * 3 - 0.5, 1.0); - const float3 texcoord_x_periodic2 = shadow_norm_phosphor_rad * triangle_wave(x_adj * 3, 1.0); - const float3 ty1 = sqrt( - shadow_norm_phosphor_rad*shadow_norm_phosphor_rad - texcoord_x_periodic1*texcoord_x_periodic1 - ); - const float3 ty2 = sqrt( - shadow_norm_phosphor_rad*shadow_norm_phosphor_rad - texcoord_x_periodic2*texcoord_x_periodic2 - ); - - const float shadow_px = exp(-calculate_phosphor_p_value(shadow_edge_norm_tx, thickness_scaled.x, shadow_q.x)); - const float3 shadow_py1 = exp(-calculate_phosphor_p_value(ty1 * 0.5 * rcp(shadow_aspect_ratio), thickness_scaled.y, shadow_q.y)); - const float3 shadow_py2 = exp(-calculate_phosphor_p_value(ty2 * 0.5 * rcp(shadow_aspect_ratio), thickness_scaled.y, shadow_q.y)); - - float3 theta_x1 = triangle_wave(x_adj, 1); - float3 alpha_x1 = exp((theta_x1 - 0.5) * shadow_q.x); - alpha_x1 *= rcp(alpha_x1 + shadow_px); - - float3 theta_x2 = triangle_wave(x_adj + 0.5, 1); - float3 alpha_x2 = exp((theta_x2 - 0.5) * shadow_q.x); - alpha_x2 *= rcp(alpha_x2 + shadow_px); - - float3 theta_y1 = triangle_wave(y_adj, 0.5); - float3 alpha_y1 = exp((theta_y1 - 0.5) * shadow_q.y); - alpha_y1 *= rcp(alpha_y1 + shadow_py1); - - float3 theta_y2 = triangle_wave(y_adj + 1, 0.5); - float3 alpha_y2 = exp((theta_y2 - 0.5) * shadow_q.y); - alpha_y2 *= rcp(alpha_y2 + shadow_py2); - - return alpha_x1 * alpha_y1 + alpha_x2 * alpha_y2; -} - -float3 get_phosphor_intensity_grille_small( - const float2 texcoord, - const float2 viewport_frequency_factor, - const float2 grille_pq_x -) { - float3 center_offsets_x = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - smallgrille_norm_center_offsets_x.grg : smallgrille_norm_center_offsets_x; - - center_offsets_x += phosphor_offset_x * 0.5; - - float3 theta = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x, 1); - float3 alpha = exp((theta - 0.5) * grille_pq_x.y); - alpha *= rcp(alpha + grille_pq_x.x); - - // Taking a sqrt here helps hide the gaps between the pixels when the triad size is small - return sqrt(alpha); -} - -float3 get_phosphor_intensity_slot_small( - const float2 texcoord, - const float2 viewport_frequency_factor, - const float2 slot_pq_x, - const float2 slot_pq_y -) { - float3 center_offsets_x = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - smallslot_norm_center_offsets_x.grg : smallslot_norm_center_offsets_x; - float3 center_offsets_y1 = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - smallslot_norm_center_offsets_y1.grg : smallslot_norm_center_offsets_y1; - float3 center_offsets_y2 = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - smallslot_norm_center_offsets_y2.grg : smallslot_norm_center_offsets_y2; - - center_offsets_x += phosphor_offset_x * 0.5; - center_offsets_y1 += phosphor_offset_y * 0.5; - center_offsets_y2 += phosphor_offset_y * 0.5; - - float3 theta_x1 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x, 0.5); - float3 alpha_x1 = exp((theta_x1 - 0.5) * slot_pq_x.y); - alpha_x1 *= rcp(alpha_x1 + slot_pq_x.x); - - float3 theta_x2 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x + 1, 0.5); - float3 alpha_x2 = exp((theta_x2 - 0.5) * slot_pq_x.y); - alpha_x2 *= rcp(alpha_x2 + slot_pq_x.x); - - float3 theta_y1 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y1, 1); - float3 alpha_y1 = exp((theta_y1 - 0.5) * slot_pq_y.y); - alpha_y1 *= rcp(alpha_y1 + slot_pq_y.x); - - float3 theta_y2 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y2 + 0.5, 1); - float3 alpha_y2 = exp((theta_y2 - 0.5) * slot_pq_y.y); - alpha_y2 *= rcp(alpha_y2 + slot_pq_y.x); - - // Taking a sqrt here helps hide the gaps between the pixels when the triad size is small - return (alpha_x1 * alpha_y1 + alpha_x2 * alpha_y2); -} - -float3 get_phosphor_intensity_shadow_small( - const float2 texcoord, - const float2 viewport_frequency_factor, - const float2 shadow_pq_x, - const float2 shadow_pq_y -) { - float3 center_offsets_x = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - smallshadow_norm_center_offsets_x.grg : smallshadow_norm_center_offsets_x; - float3 center_offsets_y = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ? - smallshadow_norm_center_offsets_y.grg : smallshadow_norm_center_offsets_y; - - center_offsets_x += phosphor_offset_x * 0.5; - center_offsets_y += phosphor_offset_y * 0.5; - - float3 theta_x1 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x, 1); - float3 alpha_x1 = exp((theta_x1 - 0.5) * shadow_pq_x.y); - alpha_x1 *= rcp(alpha_x1 + shadow_pq_x.x); - - float3 theta_x2 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x + 0.5, 1); - float3 alpha_x2 = exp((theta_x2 - 0.5) * shadow_pq_x.y); - alpha_x2 *= rcp(alpha_x2 + shadow_pq_x.x); - - float3 theta_y1 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y, 0.5); - float3 alpha_y1 = exp((theta_y1 - 0.5) * shadow_pq_y.y); - alpha_y1 *= rcp(alpha_y1 + shadow_pq_y.x); - - float3 theta_y2 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y + 1, 0.5); - float3 alpha_y2 = exp((theta_y2 - 0.5) * shadow_pq_y.y); - alpha_y2 *= rcp(alpha_y2 + shadow_pq_y.x); - - // Taking a sqrt here helps hide the gaps between the pixels when the triad size is small - return sqrt(alpha_x1 * alpha_y1 + alpha_x2 * alpha_y2); -} - -#endif // _PHOSHOR_MASK_CALCULATIONS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/quad-pixel-communication.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/quad-pixel-communication.fxh deleted file mode 100644 index 8e44b4e3f..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/quad-pixel-communication.fxh +++ /dev/null @@ -1,243 +0,0 @@ - -#ifndef _QUAD_PIXEL_COMMUNICATION_H -#define _QUAD_PIXEL_COMMUNICATION_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey* -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - -///////////////////////////////// DISCLAIMER ///////////////////////////////// - -// *This code was inspired by "Shader Amortization using Pixel Quad Message -// Passing" by Eric Penner, published in GPU Pro 2, Chapter VI.2. My intent -// is not to plagiarize his fundamentally similar code and assert my own -// copyright, but the algorithmic helper functions require so little code that -// implementations can't vary by much except bugfixes and conventions. I just -// wanted to license my own particular code here to avoid ambiguity and make it -// clear that as far as I'm concerned, people can do as they please with it. - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// Given screen pixel numbers, derive a "quad vector" describing a fragment's -// position in its 2x2 pixel quad. Given that vector, obtain the values of any -// variable at neighboring fragments. -// Requires: Using this file in general requires: -// 1.) ddx() and ddy() are present in the current Cg profile. -// 2.) The GPU driver is using fine/high-quality derivatives. -// Functions will give incorrect results if this is not true, -// so a test function is included. - - -///////////////////// QUAD-PIXEL COMMUNICATION PRIMITIVES //////////////////// - -float4 get_quad_vector_naive(float4 output_pixel_num_wrt_uvxy) -{ - // Requires: Two measures of the current fragment's output pixel number - // in the range ([0, output_size.x), [0, output_size.y)): - // 1.) output_pixel_num_wrt_uvxy.xy increase with uv coords. - // 2.) output_pixel_num_wrt_uvxy.zw increase with screen xy. - // Returns: Two measures of the fragment's position in its 2x2 quad: - // 1.) The .xy components are its 2x2 placement with respect to - // uv direction (the origin (0, 0) is at the top-left): - // top-left = (-1.0, -1.0) top-right = ( 1.0, -1.0) - // bottom-left = (-1.0, 1.0) bottom-right = ( 1.0, 1.0) - // You need this to arrange/weight shared texture samples. - // 2.) The .zw components are its 2x2 placement with respect to - // screen xy direction (position); the origin varies. - // quad_gather needs this measure to work correctly. - // Note: quad_vector.zw = quad_vector.xy * float2( - // ddx(output_pixel_num_wrt_uvxy.x), - // ddy(output_pixel_num_wrt_uvxy.y)); - // Caveats: This function assumes the GPU driver always starts 2x2 pixel - // quads at even pixel numbers. This assumption can be wrong - // for odd output resolutions (nondeterministically so). - float4 pixel_odd = frac(output_pixel_num_wrt_uvxy * 0.5) * 2.0; - float4 quad_vector = pixel_odd * 2.0 - float4(1.0, 1.0, 1.0, 1.0); - return quad_vector; -} - -float4 get_quad_vector(float4 output_pixel_num_wrt_uvxy) -{ - // Requires: Same as get_quad_vector_naive() (see that first). - // Returns: Same as get_quad_vector_naive() (see that first), but it's - // correct even if the 2x2 pixel quad starts at an odd pixel, - // which can occur at odd resolutions. - float4 quad_vector_guess = - get_quad_vector_naive(output_pixel_num_wrt_uvxy); - // If quad_vector_guess.zw doesn't increase with screen xy, we know - // the 2x2 pixel quad starts at an odd pixel: - float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_guess.z), - ddy(quad_vector_guess.w)); - return quad_vector_guess * odd_start_mirror.xyxy; -} - -float4 get_quad_vector(float2 output_pixel_num_wrt_uv) -{ - // Requires: 1.) ddx() and ddy() are present in the current Cg profile. - // 2.) output_pixel_num_wrt_uv must increase with uv coords and - // measure the current fragment's output pixel number in: - // ([0, output_size.x), [0, output_size.y)) - // Returns: Same as get_quad_vector_naive() (see that first), but it's - // correct even if the 2x2 pixel quad starts at an odd pixel, - // which can occur at odd resolutions. - // Caveats: This function requires less information than the version - // taking a float4, but it's potentially slower. - // Do screen coords increase with or against uv? Get the direction - // with respect to (uv.x, uv.y) for (screen.x, screen.y) in {-1, 1}. - float2 screen_uv_mirror = float2(ddx(output_pixel_num_wrt_uv.x), - ddy(output_pixel_num_wrt_uv.y)); - float2 pixel_odd_wrt_uv = frac(output_pixel_num_wrt_uv * 0.5) * 2.0; - float2 quad_vector_uv_guess = (pixel_odd_wrt_uv - float2(0.5, 0.5)) * 2.0; - float2 quad_vector_screen_guess = quad_vector_uv_guess * screen_uv_mirror; - // If quad_vector_screen_guess doesn't increase with screen xy, we know - // the 2x2 pixel quad starts at an odd pixel: - float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_screen_guess.x), - ddy(quad_vector_screen_guess.y)); - float4 quad_vector_guess = float4( - quad_vector_uv_guess, quad_vector_screen_guess); - return quad_vector_guess * odd_start_mirror.xyxy; -} - -void quad_gather(float4 quad_vector, float4 curr, - out float4 adjx, out float4 adjy, out float4 diag) -{ - // Requires: 1.) ddx() and ddy() are present in the current Cg profile. - // 2.) The GPU driver is using fine/high-quality derivatives. - // 3.) quad_vector describes the current fragment's location in - // its 2x2 pixel quad using get_quad_vector()'s conventions. - // 4.) curr is any vector you wish to get neighboring values of. - // Returns: Values of an input vector (curr) at neighboring fragments - // adjacent x, adjacent y, and diagonal (via out parameters). - adjx = curr - ddx(curr) * quad_vector.z; - adjy = curr - ddy(curr) * quad_vector.w; - diag = adjx - ddy(adjx) * quad_vector.w; -} - -void quad_gather(float4 quad_vector, float3 curr, - out float3 adjx, out float3 adjy, out float3 diag) -{ - // Float3 version - adjx = curr - ddx(curr) * quad_vector.z; - adjy = curr - ddy(curr) * quad_vector.w; - diag = adjx - ddy(adjx) * quad_vector.w; -} - -void quad_gather(float4 quad_vector, float2 curr, - out float2 adjx, out float2 adjy, out float2 diag) -{ - // Float2 version - adjx = curr - ddx(curr) * quad_vector.z; - adjy = curr - ddy(curr) * quad_vector.w; - diag = adjx - ddy(adjx) * quad_vector.w; -} - -float4 quad_gather(float4 quad_vector, float curr) -{ - // Float version: - // Returns: return.x == current - // return.y == adjacent x - // return.z == adjacent y - // return.w == diagonal - float4 all = float4(curr, curr, curr, curr); - all.y = all.x - ddx(all.x) * quad_vector.z; - all.zw = all.xy - ddy(all.xy) * quad_vector.w; - return all; -} - -float4 quad_gather_sum(float4 quad_vector, float4 curr) -{ - // Requires: Same as quad_gather() - // Returns: Sum of an input vector (curr) at all fragments in a quad. - float4 adjx, adjy, diag; - quad_gather(quad_vector, curr, adjx, adjy, diag); - return (curr + adjx + adjy + diag); -} - -float3 quad_gather_sum(float4 quad_vector, float3 curr) -{ - // Float3 version: - float3 adjx, adjy, diag; - quad_gather(quad_vector, curr, adjx, adjy, diag); - return (curr + adjx + adjy + diag); -} - -float2 quad_gather_sum(float4 quad_vector, float2 curr) -{ - // Float2 version: - float2 adjx, adjy, diag; - quad_gather(quad_vector, curr, adjx, adjy, diag); - return (curr + adjx + adjy + diag); -} - -float quad_gather_sum(float4 quad_vector, float curr) -{ - // Float version: - float4 all_values = quad_gather(quad_vector, curr); - return (all_values.x + all_values.y + all_values.z + all_values.w); -} - -bool fine_derivatives_working(float4 quad_vector, float4 curr) -{ - // Requires: 1.) ddx() and ddy() are present in the current Cg profile. - // 2.) quad_vector describes the current fragment's location in - // its 2x2 pixel quad using get_quad_vector()'s conventions. - // 3.) curr must be a test vector with non-constant derivatives - // (its value should change nonlinearly across fragments). - // Returns: true if fine/hybrid/high-quality derivatives are used, or - // false if coarse derivatives are used or inconclusive - // Usage: Test whether quad-pixel communication is working! - // Method: We can confirm fine derivatives are used if the following - // holds (ever, for any value at any fragment): - // (ddy(curr) != ddy(adjx)) or (ddx(curr) != ddx(adjy)) - // The more values we test (e.g. test a float4 two ways), the - // easier it is to demonstrate fine derivatives are working. - // TODO: Check for floating point exact comparison issues! - float4 ddx_curr = ddx(curr); - float4 ddy_curr = ddy(curr); - float4 adjx = curr - ddx_curr * quad_vector.z; - float4 adjy = curr - ddy_curr * quad_vector.w; - bool ddy_different = any(bool4(ddy_curr.x != ddy(adjx).x, ddy_curr.y != ddy(adjx).y, ddy_curr.z != ddy(adjx).z, ddy_curr.w != ddy(adjx).w)); - bool ddx_different = any(bool4(ddx_curr.x != ddx(adjy).x, ddx_curr.y != ddx(adjy).y, ddx_curr.z != ddx(adjy).z, ddx_curr.w != ddx(adjy).w)); - return any(bool2(ddy_different, ddx_different)); -} - -bool fine_derivatives_working_fast(float4 quad_vector, float curr) -{ - // Requires: Same as fine_derivatives_working() - // Returns: Same as fine_derivatives_working() - // Usage: This is faster than fine_derivatives_working() but more - // likely to return false negatives, so it's less useful for - // offline testing/debugging. It's also useless as the basis - // for dynamic runtime branching as of May 2014: Derivatives - // (and quad-pixel communication) are currently disallowed in - // branches. However, future GPU's may allow you to use them - // in dynamic branches if you promise the branch condition - // evaluates the same for every fragment in the quad (and/or if - // the driver enforces that promise by making a single fragment - // control branch decisions). If that ever happens, this - // version may become a more economical choice. - float ddx_curr = ddx(curr); - float ddy_curr = ddy(curr); - float adjx = curr - ddx_curr * quad_vector.z; - return (ddy_curr != ddy(adjx)); -} - -#endif // _QUAD_PIXEL_COMMUNICATION_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/scanline-functions.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/scanline-functions.fxh deleted file mode 100644 index 9f796c59c..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/scanline-functions.fxh +++ /dev/null @@ -1,501 +0,0 @@ -#ifndef _SCANLINE_FUNCTIONS_H -#define _SCANLINE_FUNCTIONS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade. -// Copyright (C) 2020 Alex Gunter -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -/////////////////////////////// BEGIN INCLUDES /////////////////////////////// - -#include "bind-shader-params.fxh" -#include "gamma-management.fxh" -#include "special-functions.fxh" - -//////////////////////////////// END INCLUDES //////////////////////////////// - -///////////////////////////// SCANLINE FUNCTIONS ///////////////////////////// - -float2 round_coord( - const float2 c, - const float2 starting_position, - const float2 bin_size -) { - const float2 adj_c = c - starting_position; - return c - fmod(adj_c, bin_size) + bin_size * 0.5; -} - - -// Use preproc defs for these, so they work for arbitrary choices of float1/2/3/4 -#define triangle_wave(t, f) abs(1 - 2*frac((t) * (f))) - -#define sawtooth_incr_wave(t, f) frac((t) * (f)) - -// using fmod(-t*f, 1.0) outputs 0 at t == 0, but I want it to output 1 -#define sawtooth_decr_wave(t, f) 1 - frac((t) * (f)) - - -struct InterpolationFieldData { - float triangle_wave_freq; - bool field_parity; - bool scanline_parity; - bool wrong_field; -}; - -InterpolationFieldData precalc_interpolation_field_data(float2 texcoord) { - InterpolationFieldData data; - - data.triangle_wave_freq = 2; - - const float field_wave = triangle_wave(texcoord.y + rcp(2*data.triangle_wave_freq), data.triangle_wave_freq * 0.5) * 2 - 1; - data.scanline_parity = field_wave >= 0; - - return data; -} - -InterpolationFieldData calc_interpolation_field_data(float2 texcoord, float scale) { - InterpolationFieldData data; - - data.triangle_wave_freq = scale * rcp(scanline_thickness); - // data.triangle_wave_freq = content_size.y * rcp(scanline_thickness); - - const bool frame_count_parity = (frame_count % 2 == 1) && (scanline_deinterlacing_mode != 1); - data.field_parity = (frame_count_parity && !interlace_back_field_first) || (!frame_count_parity && interlace_back_field_first); - - const float field_wave = triangle_wave(texcoord.y + rcp(2*data.triangle_wave_freq), data.triangle_wave_freq * 0.5) * 2 - 1; - data.scanline_parity = field_wave >= 0; - - const bool wrong_field_raw = (data.scanline_parity && !data.field_parity) || (!data.scanline_parity && data.field_parity); - data.wrong_field = enable_interlacing && wrong_field_raw; - - return data; -} - -float get_gaussian_sigma(const float color, const float sigma_range) -{ - // Requires: Globals: - // 1.) gaussian_beam_min_sigma and gaussian_beam_max_sigma are global floats - // containing the desired minimum and maximum beam standard - // deviations, for dim and bright colors respectively. - // 2.) gaussian_beam_max_sigma must be > 0.0 - // 3.) gaussian_beam_min_sigma must be in (0.0, gaussian_beam_max_sigma] - // 4.) gaussian_beam_spot_power must be defined as a global float. - // Parameters: - // 1.) color is the underlying source color along a scanline - // 2.) sigma_range = gaussian_beam_max_sigma - gaussian_beam_min_sigma; we take - // sigma_range as a parameter to avoid repeated computation - // when beam_{min, max}_sigma are runtime shader parameters - // Optional: Users may set beam_spot_shape_function to 1 to define the - // inner f(color) subfunction (see below) as: - // f(color) = sqrt(1.0 - (color - 1.0)*(color - 1.0)) - // Otherwise (technically, if beam_spot_shape_function < 0.5): - // f(color) = pow(color, gaussian_beam_spot_power) - // Returns: The standard deviation of the Gaussian beam for "color:" - // sigma = gaussian_beam_min_sigma + sigma_range * f(color) - // Details/Discussion: - // The beam's spot shape vaguely resembles an aspect-corrected f() in the - // range [0, 1] (not quite, but it's related). f(color) = color makes - // spots look like diamonds, and a spherical function or cube balances - // between variable width and a soft/realistic shape. A gaussian_beam_spot_power - // > 1.0 can produce an ugly spot shape and more initial clipping, but the - // final shape also differs based on the horizontal resampling filter and - // the phosphor bloom. For instance, resampling horizontally in nonlinear - // light and/or with a sharp (e.g. Lanczos) filter will sharpen the spot - // shape, but a sixth root is still quite soft. A power function (default - // 1.0/3.0 gaussian_beam_spot_power) is most flexible, but a fixed spherical curve - // has the highest variability without an awful spot shape. - // - // gaussian_beam_min_sigma affects scanline sharpness/aliasing in dim areas, and its - // difference from gaussian_beam_max_sigma affects beam width variability. It only - // affects clipping [for pure Gaussians] if gaussian_beam_spot_power > 1.0 (which is - // a conservative estimate for a more complex constraint). - // - // gaussian_beam_max_sigma affects clipping and increasing scanline width/softness - // as color increases. The wider this is, the more scanlines need to be - // evaluated to avoid distortion. For a pure Gaussian, the max_beam_sigma - // at which the first unused scanline always has a weight < 1.0/255.0 is: - // num scanlines = 2, max_beam_sigma = 0.2089; distortions begin ~0.34 - // num scanlines = 3, max_beam_sigma = 0.3879; distortions begin ~0.52 - // num scanlines = 4, max_beam_sigma = 0.5723; distortions begin ~0.70 - // num scanlines = 5, max_beam_sigma = 0.7591; distortions begin ~0.89 - // num scanlines = 6, max_beam_sigma = 0.9483; distortions begin ~1.08 - // Generalized Gaussians permit more leeway here as steepness increases. - if(beam_spot_shape_function < 0.5) - { - // Use a power function: - return gaussian_beam_min_sigma + sigma_range * pow(color, gaussian_beam_spot_power); - } - else - { - // Use a spherical function: - const float color_minus_1 = color - 1; - return gaussian_beam_min_sigma + sigma_range * sqrt(1.0 - color_minus_1*color_minus_1); - } -} - -float get_generalized_gaussian_beta(const float color, const float shape_range) -{ - // Requires: Globals: - // 1.) gaussian_beam_min_shape and gaussian_beam_max_shape are global floats - // containing the desired min/max generalized Gaussian - // beta parameters, for dim and bright colors respectively. - // 2.) gaussian_beam_max_shape must be >= 2.0 - // 3.) gaussian_beam_min_shape must be in [2.0, gaussian_beam_max_shape] - // 4.) gaussian_beam_shape_power must be defined as a global float. - // Parameters: - // 1.) color is the underlying source color along a scanline - // 2.) shape_range = gaussian_beam_max_shape - gaussian_beam_min_shape; we take - // shape_range as a parameter to avoid repeated computation - // when beam_{min, max}_shape are runtime shader parameters - // Returns: The type-I generalized Gaussian "shape" parameter beta for - // the given color. - // Details/Discussion: - // Beta affects the scanline distribution as follows: - // a.) beta < 2.0 narrows the peak to a spike with a discontinuous slope - // b.) beta == 2.0 just degenerates to a Gaussian - // c.) beta > 2.0 flattens and widens the peak, then drops off more steeply - // than a Gaussian. Whereas high sigmas widen and soften peaks, high - // beta widen and sharpen peaks at the risk of aliasing. - // Unlike high gaussian_beam_spot_powers, high gaussian_beam_shape_powers actually soften shape - // transitions, whereas lower ones sharpen them (at the risk of aliasing). - return gaussian_beam_min_shape + shape_range * pow(color, gaussian_beam_shape_power); -} - -float3 get_raw_interpolated_color(const float3 color0, - const float3 color1, const float3 color2, const float3 color3, - const float4 weights) -{ - // Use max to avoid bizarre artifacts from negative colors: - const float4x3 mtrx = float4x3(color0, color1, color2, color3); - const float3 m = mul(weights, mtrx); - return max(m, 0.0); -} - -float3 get_interpolated_linear_color(const float3 color0, const float3 color1, - const float3 color2, const float3 color3, const float4 weights) -{ - // Requires: 1.) Requirements of include/gamma-management.h must be met: - // intermediate_gamma must be globally defined, and input - // colors are interpreted as linear RGB unless you #define - // GAMMA_ENCODE_EVERY_FBO (in which case they are - // interpreted as gamma-encoded with intermediate_gamma). - // 2.) color0-3 are colors sampled from a texture with tex2D(). - // They are interpreted as defined in requirement 1. - // 3.) weights contains weights for each color, summing to 1.0. - // 4.) beam_horiz_linear_rgb_weight must be defined as a global - // float in [0.0, 1.0] describing how much blending should - // be done in linear RGB (rest is gamma-corrected RGB). - // 5.) _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE must be #defined - // if beam_horiz_linear_rgb_weight is anything other than a - // static constant, or we may try branching at runtime - // without dynamic branches allowed (slow). - // Returns: Return an interpolated color lookup between the four input - // colors based on the weights in weights. The final color will - // be a linear RGB value, but the blending will be done as - // indicated above. - const float intermediate_gamma = get_intermediate_gamma(); - const float inv_intermediate_gamma = 1.0 / intermediate_gamma; - // Branch if beam_horiz_linear_rgb_weight is static (for free) or if the - // profile allows dynamic branches (faster than computing extra pows): - #if !_RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE - #define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT - #else - #if _DRIVERS_ALLOW_DYNAMIC_BRANCHES - #define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT - #endif - #endif - #ifdef SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT - // beam_horiz_linear_rgb_weight is static, so we can branch: - #ifdef GAMMA_ENCODE_EVERY_FBO - const float3 gamma_mixed_color = pow( - get_raw_interpolated_color(color0, color1, color2, color3, weights), - intermediate_gamma); - if(beam_horiz_linear_rgb_weight > 0.0) - { - const float3 linear_mixed_color = get_raw_interpolated_color( - pow(color0, intermediate_gamma), - pow(color1, intermediate_gamma), - pow(color2, intermediate_gamma), - pow(color3, intermediate_gamma), - weights); - return lerp(gamma_mixed_color, linear_mixed_color, beam_horiz_linear_rgb_weight); - } - else - { - return gamma_mixed_color; - } - #else - const float3 linear_mixed_color = get_raw_interpolated_color( - color0, color1, color2, color3, weights); - if(beam_horiz_linear_rgb_weight < 1.0) - { - const float3 gamma_mixed_color = get_raw_interpolated_color( - pow(color0, inv_intermediate_gamma), - pow(color1, inv_intermediate_gamma), - pow(color2, inv_intermediate_gamma), - pow(color3, inv_intermediate_gamma), - weights); - return lerp(gamma_mixed_color, linear_mixed_color, beam_horiz_linear_rgb_weight); - } - else - { - return linear_mixed_color; - } - #endif // GAMMA_ENCODE_EVERY_FBO - #else - #ifdef GAMMA_ENCODE_EVERY_FBO - // Inputs: color0-3 are colors in gamma-encoded RGB. - const float3 gamma_mixed_color = pow(get_raw_interpolated_color( - color0, color1, color2, color3, weights), intermediate_gamma); - const float3 linear_mixed_color = get_raw_interpolated_color( - pow(color0, intermediate_gamma), - pow(color1, intermediate_gamma), - pow(color2, intermediate_gamma), - pow(color3, intermediate_gamma), - weights); - return lerp(gamma_mixed_color, linear_mixed_color, beam_horiz_linear_rgb_weight); - #else - // Inputs: color0-3 are colors in linear RGB. - const float3 linear_mixed_color = get_raw_interpolated_color( - color0, color1, color2, color3, weights); - const float3 gamma_mixed_color = get_raw_interpolated_color( - pow(color0, inv_intermediate_gamma), - pow(color1, inv_intermediate_gamma), - pow(color2, inv_intermediate_gamma), - pow(color3, inv_intermediate_gamma), - weights); - // wtf fixme -// const float beam_horiz_linear_rgb_weight1 = 1.0; - return lerp(gamma_mixed_color, linear_mixed_color, - beam_horiz_linear_rgb_weight); - #endif // GAMMA_ENCODE_EVERY_FBO - #endif // SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT -} - -float3 get_scanline_color(const sampler2D tex, const float2 scanline_uv, - const float2 uv_step_x, const float4 weights) -{ - // Requires: 1.) scanline_uv must be vertically snapped to the caller's - // desired line or scanline and horizontally snapped to the - // texel just left of the output pixel (color1) - // 2.) uv_step_x must contain the horizontal uv distance - // between texels. - // 3.) weights must contain interpolation filter weights for - // color0, color1, color2, and color3, where color1 is just - // left of the output pixel. - // Returns: Return a horizontally interpolated texture lookup using 2-4 - // nearby texels, according to weights and the conventions of - // get_interpolated_linear_color(). - // We can ignore the outside texture lookups for Quilez resampling. - const float3 color1 = tex2D_linearize(tex, scanline_uv, get_input_gamma()).rgb; - const float3 color2 = tex2D_linearize(tex, scanline_uv + uv_step_x, get_input_gamma()).rgb; - float3 color0 = float3(0.0, 0.0, 0.0); - float3 color3 = float3(0.0, 0.0, 0.0); - if(beam_horiz_filter > 0.5) - { - color0 = tex2D_linearize(tex, scanline_uv - uv_step_x, get_input_gamma()).rgb; - color3 = tex2D_linearize(tex, scanline_uv + 2.0 * uv_step_x, get_input_gamma()).rgb; - } - // Sample the texture as-is, whether it's linear or gamma-encoded: - // get_interpolated_linear_color() will handle the difference. - return get_interpolated_linear_color(color0, color1, color2, color3, weights); -} - -float3 sample_single_scanline_horizontal(const sampler2D tex, - const float2 tex_uv, const float2 tex_size, - const float2 texture_size_inv) -{ - // TODO: Add function requirements. - // Snap to the previous texel and get sample dists from 2/4 nearby texels: - const float2 curr_texel = tex_uv * tex_size; - // Use under_half to fix a rounding bug right around exact texel locations. - const float2 prev_texel = floor(curr_texel - under_half) + 0.5; - const float2 prev_texel_hor = float2(prev_texel.x, curr_texel.y); - const float2 prev_texel_hor_uv = prev_texel_hor * texture_size_inv; - const float prev_dist = curr_texel.x - prev_texel_hor.x; - const float4 sample_dists = float4(1.0 + prev_dist, prev_dist, - 1.0 - prev_dist, 2.0 - prev_dist); - // Get Quilez, Lanczos2, or Gaussian resize weights for 2/4 nearby texels: - float4 weights; - if (beam_horiz_filter < 0.5) { - // None: - weights = float4(0, 1, 0, 0); - } - else if(beam_horiz_filter < 1.5) - { - // Quilez: - const float x = sample_dists.y; - const float w2 = x*x*x*(x*(x*6.0 - 15.0) + 10.0); - weights = float4(0.0, 1.0 - w2, w2, 0.0); - } - else if(beam_horiz_filter < 2.5) - { - // Gaussian: - float inner_denom_inv = 1.0/(2.0*beam_horiz_sigma*beam_horiz_sigma); - weights = exp(-(sample_dists*sample_dists)*inner_denom_inv); - } - else - { - // Lanczos2: - const float4 pi_dists = FIX_ZERO(sample_dists * pi); - weights = 2.0 * sin(pi_dists) * sin(pi_dists * 0.5) / - (pi_dists * pi_dists); - } - // Ensure the weight sum == 1.0: - const float4 final_weights = weights/dot(weights, float4(1.0, 1.0, 1.0, 1.0)); - // Get the interpolated horizontal scanline color: - const float2 uv_step_x = float2(texture_size_inv.x, 0.0); - return get_scanline_color( - tex, prev_texel_hor_uv, uv_step_x, final_weights); -} - -float3 sample_rgb_scanline( - const sampler2D tex, - const float2 tex_uv, const float2 tex_size, - const float2 texture_size_inv -) { - if (beam_misconvergence) { - const float3 convergence_offsets_rgb_x = get_convergence_offsets_x_vector(); - const float3 convergence_offsets_rgb_y = get_convergence_offsets_y_vector(); - - const float3 offset_u_rgb = convergence_offsets_rgb_x * texture_size_inv.x; - const float3 offset_v_rgb = convergence_offsets_rgb_y * texture_size_inv.y; - - const float2 scanline_uv_r = tex_uv - float2(offset_u_rgb.r, offset_v_rgb.r); - const float2 scanline_uv_g = tex_uv - float2(offset_u_rgb.g, offset_v_rgb.g); - const float2 scanline_uv_b = tex_uv - float2(offset_u_rgb.b, offset_v_rgb.b); - - /**/ - const float4 sample_r = tex2D(tex, scanline_uv_r); - const float4 sample_g = tex2D(tex, scanline_uv_g); - const float4 sample_b = tex2D(tex, scanline_uv_b); - /**/ - - /* - const float3 sample_r = sample_single_scanline_horizontal( - tex, scanline_uv_r, tex_size, texture_size_inv); - const float3 sample_g = sample_single_scanline_horizontal( - tex, scanline_uv_g, tex_size, texture_size_inv); - const float3 sample_b = sample_single_scanline_horizontal( - tex, scanline_uv_b, tex_size, texture_size_inv); - */ - - return float3(sample_r.r, sample_g.g, sample_b.b); - } - else { - // return tex2D(tex, tex_uv).rgb; - return sample_single_scanline_horizontal(tex, tex_uv, tex_size, texture_size_inv); - } -} - -float3 sample_rgb_scanline_horizontal(const sampler2D tex, - const float2 tex_uv, const float2 tex_size, - const float2 texture_size_inv) -{ - // TODO: Add function requirements. - // Rely on a helper to make convergence easier. - if(beam_misconvergence) - { - const float3 convergence_offsets_rgb = get_convergence_offsets_x_vector(); - const float3 offset_u_rgb = convergence_offsets_rgb * texture_size_inv.xxx; - const float2 scanline_uv_r = tex_uv - float2(offset_u_rgb.r, 0.0); - const float2 scanline_uv_g = tex_uv - float2(offset_u_rgb.g, 0.0); - const float2 scanline_uv_b = tex_uv - float2(offset_u_rgb.b, 0.0); - const float3 sample_r = sample_single_scanline_horizontal( - tex, scanline_uv_r, tex_size, texture_size_inv); - const float3 sample_g = sample_single_scanline_horizontal( - tex, scanline_uv_g, tex_size, texture_size_inv); - const float3 sample_b = sample_single_scanline_horizontal( - tex, scanline_uv_b, tex_size, texture_size_inv); - return float3(sample_r.r, sample_g.g, sample_b.b); - } - else - { - return sample_single_scanline_horizontal(tex, tex_uv, tex_size, texture_size_inv); - } -} - -float3 get_averaged_scanline_sample( - sampler2D tex, const float2 texcoord, - const float scanline_start_y, const float v_step_y, - const float input_gamma -) { - // Sample `scanline_thickness` vertically-contiguous pixels and average them. - float3 interpolated_line = 0.0; - for (int i = 0; i < scanline_thickness; i++) { - float4 coord = float4(texcoord.x, scanline_start_y + i * v_step_y, 0, 0); - interpolated_line += tex2Dlod_linearize(tex, coord, input_gamma).rgb; - } - interpolated_line /= float(scanline_thickness); - - return interpolated_line; -} - -float get_beam_strength(float dist, float color, - const float sigma_range, const float shape_range) -{ - // entry point in original is scanline_contrib() - // this is based on scanline_gaussian_sampled_contrib() from original - - // See scanline_gaussian_integral_contrib() for detailed comments! - // gaussian sample = 1/(sigma*sqrt(2*pi)) * e**(-(x**2)/(2*sigma**2)) - const float sigma = get_gaussian_sigma(color, sigma_range); - // Avoid repeated divides: - const float sigma_inv = 1.0 / sigma; - const float inner_denom_inv = 0.5 * sigma_inv * sigma_inv; - const float outer_denom_inv = sigma_inv/sqrt(2.0*pi); - - return color*exp(-(dist*dist)*inner_denom_inv)*outer_denom_inv; -} - -float get_gaussian_beam_strength( - float dist, - float color, - const float sigma_range, - const float shape_range -) { - // entry point in original is scanline_contrib() - // this is based on scanline_generalized_gaussian_sampled_contrib() from original - - // See scanline_generalized_gaussian_integral_contrib() for details! - // generalized sample = - // beta/(2*alpha*gamma(1/beta)) * e**(-(|x|/alpha)**beta) - const float alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range); - const float beta = get_generalized_gaussian_beta(color, shape_range); - // Avoid repeated divides: - const float alpha_inv = 1.0 / alpha; - const float beta_inv = 1.0 / beta; - const float scale = color * beta * 0.5 * alpha_inv / gamma_impl(beta_inv, beta); - - return scale * exp(-pow(abs(dist*alpha_inv), beta)); -} - -float get_linear_beam_strength( - const float dist, - const float color, - const float num_pixels, - const bool interlaced -) { - const float p = color * (1 - abs(dist)); - return clamp(p, 0, color); -} - - -#endif // _SCANLINE_FUNCTIONS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/special-functions.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/special-functions.fxh deleted file mode 100644 index 1808223a7..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/special-functions.fxh +++ /dev/null @@ -1,504 +0,0 @@ -#ifndef _SPECIAL_FUNCTIONS_H -#define _SPECIAL_FUNCTIONS_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// This file implements the following mathematical special functions: -// 1.) erf() = 2/sqrt(pi) * indefinite_integral(e**(-x**2)) -// 2.) gamma(s), a real-numbered extension of the integer factorial function -// It also implements normalized_ligamma(s, z), a normalized lower incomplete -// gamma function for s < 0.5 only. Both gamma() and normalized_ligamma() can -// be called with an _impl suffix to use an implementation version with a few -// extra precomputed parameters (which may be useful for the caller to reuse). -// See below for details. -// -// Design Rationale: -// Pretty much every line of code in this file is duplicated four times for -// different input types (float4/float3/float2/float). This is unfortunate, -// but Cg doesn't allow function templates. Macros would be far less verbose, -// but they would make the code harder to document and read. I don't expect -// these functions will require a whole lot of maintenance changes unless -// someone ever has need for more robust incomplete gamma functions, so code -// duplication seems to be the lesser evil in this case. - - -/////////////////////////// GAUSSIAN ERROR FUNCTION ////////////////////////// - -float4 erf6(float4 x) -{ - // Requires: x is the standard parameter to erf(). - // Returns: Return an Abramowitz/Stegun approximation of erf(), where: - // erf(x) = 2/sqrt(pi) * integral(e**(-x**2)) - // This approximation has a max absolute error of 2.5*10**-5 - // with solid numerical robustness and efficiency. See: - // https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions - const float4 sign_x = sign(x); - const float4 t = 1.0/(1.0 + 0.47047*abs(x)); - const float4 result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))* - exp(-(x*x)); - return result * sign_x; -} - -float3 erf6(const float3 x) -{ - // Float3 version: - const float3 sign_x = sign(x); - const float3 t = 1.0/(1.0 + 0.47047*abs(x)); - const float3 result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))* - exp(-(x*x)); - return result * sign_x; -} - -float2 erf6(const float2 x) -{ - // Float2 version: - const float2 sign_x = sign(x); - const float2 t = 1.0/(1.0 + 0.47047*abs(x)); - const float2 result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))* - exp(-(x*x)); - return result * sign_x; -} - -float erf6(const float x) -{ - // Float version: - const float sign_x = sign(x); - const float t = 1.0/(1.0 + 0.47047*abs(x)); - const float result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))* - exp(-(x*x)); - return result * sign_x; -} - -float4 erft(const float4 x) -{ - // Requires: x is the standard parameter to erf(). - // Returns: Approximate erf() with the hyperbolic tangent. The error is - // visually noticeable, but it's blazing fast and perceptually - // close...at least on ATI hardware. See: - // http://www.maplesoft.com/applications/view.aspx?SID=5525&view=html - // Warning: Only use this if your hardware drivers correctly implement - // tanh(): My nVidia 8800GTS returns garbage output. - return tanh(1.202760580 * x); -} - -float3 erft(const float3 x) -{ - // Float3 version: - return tanh(1.202760580 * x); -} - -float2 erft(const float2 x) -{ - // Float2 version: - return tanh(1.202760580 * x); -} - -float erft(const float x) -{ - // Float version: - return tanh(1.202760580 * x); -} - -float4 erf(const float4 x) -{ - // Requires: x is the standard parameter to erf(). - // Returns: Some approximation of erf(x), depending on user settings. - #ifdef ERF_FAST_APPROXIMATION - return erft(x); - #else - return erf6(x); - #endif -} - -float3 erf(const float3 x) -{ - // Float3 version: - #ifdef ERF_FAST_APPROXIMATION - return erft(x); - #else - return erf6(x); - #endif -} - -float2 erf(const float2 x) -{ - // Float2 version: - #ifdef ERF_FAST_APPROXIMATION - return erft(x); - #else - return erf6(x); - #endif -} - -float erf(const float x) -{ - // Float version: - #ifdef ERF_FAST_APPROXIMATION - return erft(x); - #else - return erf6(x); - #endif -} - - -/////////////////////////// COMPLETE GAMMA FUNCTION ////////////////////////// - -float4 gamma_impl(const float4 s, const float4 s_inv) -{ - // Requires: 1.) s is the standard parameter to the gamma function, and - // it should lie in the [0, 36] range. - // 2.) s_inv = 1.0/s. This implementation function requires - // the caller to precompute this value, giving users the - // opportunity to reuse it. - // Returns: Return approximate gamma function (real-numbered factorial) - // output using the Lanczos approximation with two coefficients - // calculated using Paul Godfrey's method here: - // http://my.fit.edu/~gabdo/gamma.txt - // An optimal g value for s in [0, 36] is ~1.12906830989, with - // a maximum relative error of 0.000463 for 2**16 equally - // evals. We could use three coeffs (0.0000346 error) without - // hurting latency, but this allows more parallelism with - // outside instructions. - static const float g = 1.12906830989; - static const float c0 = 0.8109119309638332633713423362694399653724431; - static const float c1 = 0.4808354605142681877121661197951496120000040; - static const float e = 2.71828182845904523536028747135266249775724709; - const float4 sph = s + 0.5; - const float4 lanczos_sum = c0 + c1/(s + 1.0); - const float4 base = (sph + g)/e; // or (s + g + float4(0.5))/e - // gamma(s + 1) = base**sph * lanczos_sum; divide by s for gamma(s). - // This has less error for small s's than (s -= 1.0) at the beginning. - return (pow(base, sph) * lanczos_sum) * s_inv; -} - -float3 gamma_impl(const float3 s, const float3 s_inv) -{ - // Float3 version: - static const float g = 1.12906830989; - static const float c0 = 0.8109119309638332633713423362694399653724431; - static const float c1 = 0.4808354605142681877121661197951496120000040; - static const float e = 2.71828182845904523536028747135266249775724709; - const float3 sph = s + 0.5; - const float3 lanczos_sum = c0 + c1/(s + 1.0); - const float3 base = (sph + g)/e; - return (pow(base, sph) * lanczos_sum) * s_inv; -} - -float2 gamma_impl(const float2 s, const float2 s_inv) -{ - // Float2 version: - static const float g = 1.12906830989; - static const float c0 = 0.8109119309638332633713423362694399653724431; - static const float c1 = 0.4808354605142681877121661197951496120000040; - static const float e = 2.71828182845904523536028747135266249775724709; - const float2 sph = s + 0.5; - const float2 lanczos_sum = c0 + c1/(s + 1.0); - const float2 base = (sph + g)/e; - return (pow(base, sph) * lanczos_sum) * s_inv; -} - -float gamma_impl(const float s, const float s_inv) -{ - // Float version: - static const float g = 1.12906830989; - static const float c0 = 0.8109119309638332633713423362694399653724431; - static const float c1 = 0.4808354605142681877121661197951496120000040; - static const float e = 2.71828182845904523536028747135266249775724709; - const float sph = s + 0.5; - const float lanczos_sum = c0 + c1/(s + 1.0); - const float base = (sph + g)/e; - return (pow(base, sph) * lanczos_sum) * s_inv; -} - -float4 gamma(const float4 s) -{ - // Requires: s is the standard parameter to the gamma function, and it - // should lie in the [0, 36] range. - // Returns: Return approximate gamma function output with a maximum - // relative error of 0.000463. See gamma_impl for details. - return gamma_impl(s, 1.0/s); -} - -float3 gamma(const float3 s) -{ - // Float3 version: - return gamma_impl(s, 1.0/s); -} - -float2 gamma(const float2 s) -{ - // Float2 version: - return gamma_impl(s, 1.0/s); -} - -float gamma(const float s) -{ - // Float version: - return gamma_impl(s, 1.0/s); -} - - -//////////////// INCOMPLETE GAMMA FUNCTIONS (RESTRICTED INPUT) /////////////// - -// Lower incomplete gamma function for small s and z (implementation): -float4 ligamma_small_z_impl(const float4 s, const float4 z, const float4 s_inv) -{ - // Requires: 1.) s < ~0.5 - // 2.) z <= ~0.775075 - // 3.) s_inv = 1.0/s (precomputed for outside reuse) - // Returns: A series representation for the lower incomplete gamma - // function for small s and small z (4 terms). - // The actual "rolled up" summation looks like: - // last_sign = 1.0; last_pow = 1.0; last_factorial = 1.0; - // sum = last_sign * last_pow / ((s + k) * last_factorial) - // for(int i = 0; i < 4; ++i) - // { - // last_sign *= -1.0; last_pow *= z; last_factorial *= i; - // sum += last_sign * last_pow / ((s + k) * last_factorial); - // } - // Unrolled, constant-unfolded and arranged for madds and parallelism: - const float4 scale = pow(z, s); - float4 sum = s_inv; // Summation iteration 0 result - // Summation iterations 1, 2, and 3: - const float4 z_sq = z*z; - const float4 denom1 = s + 1.0; - const float4 denom2 = 2.0*s + 4.0; - const float4 denom3 = 6.0*s + 18.0; - //float4 denom4 = 24.0*s + float4(96.0); - sum -= z/denom1; - sum += z_sq/denom2; - sum -= z * z_sq/denom3; - //sum += z_sq * z_sq / denom4; - // Scale and return: - return scale * sum; -} - -float3 ligamma_small_z_impl(const float3 s, const float3 z, const float3 s_inv) -{ - // Float3 version: - const float3 scale = pow(z, s); - float3 sum = s_inv; - const float3 z_sq = z*z; - const float3 denom1 = s + 1.0; - const float3 denom2 = 2.0*s + 4.0; - const float3 denom3 = 6.0*s + 18.0; - sum -= z/denom1; - sum += z_sq/denom2; - sum -= z * z_sq/denom3; - return scale * sum; -} - -float2 ligamma_small_z_impl(const float2 s, const float2 z, const float2 s_inv) -{ - // Float2 version: - const float2 scale = pow(z, s); - float2 sum = s_inv; - const float2 z_sq = z*z; - const float2 denom1 = s + 1.0; - const float2 denom2 = 2.0*s + 4.0; - const float2 denom3 = 6.0*s + 18.0; - sum -= z/denom1; - sum += z_sq/denom2; - sum -= z * z_sq/denom3; - return scale * sum; -} - -float ligamma_small_z_impl(const float s, const float z, const float s_inv) -{ - // Float version: - const float scale = pow(z, s); - float sum = s_inv; - const float z_sq = z*z; - const float denom1 = s + 1.0; - const float denom2 = 2.0*s + 4.0; - const float denom3 = 6.0*s + 18.0; - sum -= z/denom1; - sum += z_sq/denom2; - sum -= z * z_sq/denom3; - return scale * sum; -} - -// Upper incomplete gamma function for small s and large z (implementation): -float4 uigamma_large_z_impl(const float4 s, const float4 z) -{ - // Requires: 1.) s < ~0.5 - // 2.) z > ~0.775075 - // Returns: Gauss's continued fraction representation for the upper - // incomplete gamma function (4 terms). - // The "rolled up" continued fraction looks like this. The denominator - // is truncated, and it's calculated "from the bottom up:" - // denom = float4('inf'); - // float4 one = float4(1.0); - // for(int i = 4; i > 0; --i) - // { - // denom = ((i * 2.0) - one) + z - s + (i * (s - i))/denom; - // } - // Unrolled and constant-unfolded for madds and parallelism: - const float4 numerator = pow(z, s) * exp(-z); - float4 denom = 7.0 + z - s; - denom = 5.0 + z - s + (3.0*s - 9.0)/denom; - denom = 3.0 + z - s + (2.0*s - 4.0)/denom; - denom = 1.0 + z - s + (s - 1.0)/denom; - return numerator / denom; -} - -float3 uigamma_large_z_impl(const float3 s, const float3 z) -{ - // Float3 version: - const float3 numerator = pow(z, s) * exp(-z); - float3 denom = 7.0 + z - s; - denom = 5.0 + z - s + (3.0*s - 9.0)/denom; - denom = 3.0 + z - s + (2.0*s - 4.0)/denom; - denom = 1.0 + z - s + (s - 1.0)/denom; - return numerator / denom; -} - -float2 uigamma_large_z_impl(const float2 s, const float2 z) -{ - // Float2 version: - const float2 numerator = pow(z, s) * exp(-z); - float2 denom = 7.0 + z - s; - denom = 5.0 + z - s + (3.0*s - 9.0)/denom; - denom = 3.0 + z - s + (2.0*s - 4.0)/denom; - denom = 1.0 + z - s + (s - 1.0)/denom; - return numerator / denom; -} - -float uigamma_large_z_impl(const float s, const float z) -{ - // Float version: - const float numerator = pow(z, s) * exp(-z); - float denom = 7.0 + z - s; - denom = 5.0 + z - s + (3.0*s - 9.0)/denom; - denom = 3.0 + z - s + (2.0*s - 4.0)/denom; - denom = 1.0 + z - s + (s - 1.0)/denom; - return numerator / denom; -} - -// Normalized lower incomplete gamma function for small s (implementation): -float4 normalized_ligamma_impl(const float4 s, const float4 z, - const float4 s_inv, const float4 gamma_s_inv) -{ - // Requires: 1.) s < ~0.5 - // 2.) s_inv = 1/s (precomputed for outside reuse) - // 3.) gamma_s_inv = 1/gamma(s) (precomputed for outside reuse) - // Returns: Approximate the normalized lower incomplete gamma function - // for s < 0.5. Since we only care about s < 0.5, we only need - // to evaluate two branches (not four) based on z. Each branch - // uses four terms, with a max relative error of ~0.00182. The - // branch threshold and specifics were adapted for fewer terms - // from Gil/Segura/Temme's paper here: - // http://oai.cwi.nl/oai/asset/20433/20433B.pdf - // Evaluate both branches: Real branches test slower even when available. - static const float thresh = 0.775075; - int4 z_is_large; - z_is_large.x = int(z.x > thresh); - z_is_large.y = int(z.y > thresh); - z_is_large.z = int(z.z > thresh); - z_is_large.w = int(z.w > thresh); - const float4 large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv; - const float4 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv; - // Combine the results from both branches: - int4 inverse_z_is_large = saturate(~(z_is_large)); - return large_z * float4(z_is_large) + small_z * float4(inverse_z_is_large); -} - -float3 normalized_ligamma_impl(const float3 s, const float3 z, - const float3 s_inv, const float3 gamma_s_inv) -{ - // Float3 version: - static const float thresh = 0.775075; - int3 z_is_large; - z_is_large.x = int(z.x > thresh); - z_is_large.y = int(z.y > thresh); - z_is_large.z = int(z.z > thresh); - const float3 large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv; - const float3 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv; - int3 inverse_z_is_large = saturate(~(z_is_large)); - return large_z * float3(z_is_large) + small_z * float3(inverse_z_is_large); -} - -float2 normalized_ligamma_impl(const float2 s, const float2 z, - const float2 s_inv, const float2 gamma_s_inv) -{ - // Float2 version: - static const float thresh = 0.775075; - int2 z_is_large; - z_is_large.x = int(z.x > thresh); - z_is_large.y = int(z.y > thresh); - const float2 large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv; - const float2 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv; - int2 inverse_z_is_large = saturate(~(z_is_large)); - return large_z * float2(z_is_large) + small_z * float2(inverse_z_is_large); -} - -float normalized_ligamma_impl(const float s, const float z, - const float s_inv, const float gamma_s_inv) -{ - // Float version: - static const float thresh = 0.775075; - const bool z_is_large = z > thresh; - const float large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv; - const float small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv; - return large_z * float(z_is_large) + small_z * float(!z_is_large); -} - -// Normalized lower incomplete gamma function for small s: -float4 normalized_ligamma(const float4 s, const float4 z) -{ - // Requires: s < ~0.5 - // Returns: Approximate the normalized lower incomplete gamma function - // for s < 0.5. See normalized_ligamma_impl() for details. - const float4 s_inv = 1.0/s; - const float4 gamma_s_inv = 1.0/gamma_impl(s, s_inv); - return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv); -} - -float3 normalized_ligamma(const float3 s, const float3 z) -{ - // Float3 version: - const float3 s_inv = 1.0/s; - const float3 gamma_s_inv = 1.0/gamma_impl(s, s_inv); - return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv); -} - -float2 normalized_ligamma(const float2 s, const float2 z) -{ - // Float2 version: - const float2 s_inv = 1.0/s; - const float2 gamma_s_inv = 1.0/gamma_impl(s, s_inv); - return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv); -} - -float normalized_ligamma(const float s, const float z) -{ - // Float version: - const float s_inv = 1.0/s; - const float gamma_s_inv = 1.0/gamma_impl(s, s_inv); - return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv); -} - -#endif // _SPECIAL_FUNCTIONS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/tex2Dantialias.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/tex2Dantialias.fxh deleted file mode 100644 index 65ea4f04b..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/tex2Dantialias.fxh +++ /dev/null @@ -1,1393 +0,0 @@ -#ifndef _TEX2DANTIALIAS_H -#define _TEX2DANTIALIAS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// This file provides antialiased and subpixel-aware tex2D lookups. -// Requires: All functions share these requirements: -// 1.) All requirements of gamma-management.h must be satisfied! -// 2.) pixel_to_tex_uv must be a 2x2 matrix that transforms pixe- -// space offsets to texture uv offsets. You can get this with: -// const float2 duv_dx = ddx(tex_uv); -// const float2 duv_dy = ddy(tex_uv); -// const float2x2 pixel_to_tex_uv = float2x2( -// duv_dx.x, duv_dy.x, -// duv_dx.y, duv_dy.y); -// This is left to the user in case the current Cg profile -// doesn't support ddx()/ddy(). Ideally, the user could find -// calculate a distorted tangent-space mapping analytically. -// If not, a simple flat mapping can be obtained with: -// const float2 xy_to_uv_scale = IN.output_size * -// IN.video_size/IN.texture_size; -// const float2x2 pixel_to_tex_uv = float2x2( -// xy_to_uv_scale.x, 0.0, -// 0.0, xy_to_uv_scale.y); -// Optional: To set basic AA settings, #define ANTIALIAS_OVERRIDE_BASICS and: -// 1.) Set an antialiasing level: -// static const float antialias_level = {0 (none), -// 1 (sample subpixels), 4, 5, 6, 7, 8, 12, 16, 20, 24} -// 2.) Set a filter type: -// static const float aa_filter = { -// 0 (Box, Separable), 1 (Box, Cylindrical), -// 2 (Tent, Separable), 3 (Tent, Cylindrical) -// 4 (Gaussian, Separable), 5 (Gaussian, Cylindrical) -// 6 (Cubic, Separable), 7 (Cubic, Cylindrical) -// 8 (Lanczos Sinc, Separable), -// 9 (Lanczos Jinc, Cylindrical)} -// If the input is unknown, a separable box filter is used. -// Note: Lanczos Jinc is terrible for sparse sampling, and -// using aa_axis_importance (see below) defeats the purpose. -// 3.) Mirror the sample pattern on odd frames? -// static const bool aa_temporal = {true, false] -// This helps rotational invariance but can look "fluttery." -// The user may #define ANTIALIAS_OVERRIDE_PARAMETERS to override -// (all of) the following default parameters with static or uniform -// constants (or an accessor function for subpixel offsets): -// 1.) Cubic parameters: -// static const float aa_cubic_c = 0.5; -// See http://www.imagemagick.org/Usage/filter/#mitchell -// 2.) Gaussian parameters: -// static const float aa_gauss_sigma = -// 0.5/aa_pixel_diameter; -// 3.) Set subpixel offsets. This requires an accessor function -// for compatibility with scalar runtime shader params. Return -// a float2 pixel offset in [-0.5, 0.5] for the red subpixel: -// float2 get_aa_subpixel_r_offset() -// The user may also #define ANTIALIAS_OVERRIDE_STATIC_CONSTANTS to -// override (all of) the following default static values. However, -// the file's structure requires them to be declared static const: -// 1.) static const float aa_lanczos_lobes = 3.0; -// 2.) static const float aa_gauss_support = 1.0/aa_pixel_diameter; -// Note the default tent/Gaussian support radii may appear -// arbitrary, but extensive testing found them nearly optimal -// for tough cases like strong distortion at low AA levels. -// (The Gaussian default is only best for practical gauss_sigma -// values; much larger gauss_sigmas ironically prefer slightly -// smaller support given sparse sampling, and vice versa.) -// 3.) static const float aa_tent_support = 1.0 / aa_pixel_diameter; -// 4.) static const float2 aa_xy_axis_importance: -// The sparse N-queens sampling grid interacts poorly with -// negative-lobed 2D filters. However, if aliasing is much -// stronger in one direction (e.g. horizontally with a phosphor -// mask), it can be useful to downplay sample offsets along the -// other axis. The support radius in each direction scales with -// aa_xy_axis_importance down to a minimum of 0.5 (box support), -// after which point only the offsets used for calculating -// weights continue to scale downward. This works as follows: -// If aa_xy_axis_importance = float2(1.0, 1.0/support_radius), -// the vertical support radius will drop to 1.0, and we'll just -// filter vertical offsets with the first filter lobe, while -// horizontal offsets go through the full multi-lobe filter. -// If aa_xy_axis_importance = float2(1.0, 0.0), the vertical -// support radius will drop to box support, and the vertical -// offsets will be ignored entirely (essentially giving us a -// box filter vertically). The former is potentially smoother -// (but less predictable) and the default behavior of Lanczos -// jinc, whereas the latter is sharper and the default behavior -// of cubics and Lanczos sinc. -// 5.) static const float aa_pixel_diameter: You can expand the -// pixel diameter to e.g. sqrt(2.0), which may be a better -// support range for cylindrical filters (they don't -// currently discard out-of-circle samples though). -// Finally, there are two miscellaneous options: -// 1.) If you want to antialias a manually tiled texture, you can -// #define ANTIALIAS_DISABLE_ANISOTROPIC to use tex2Dlod() to -// fix incompatibilities with anisotropic filtering. This is -// slower, and the Cg profile must support tex2Dlod(). -// 2.) If aa_cubic_c is a runtime uniform, you can #define -// _RUNTIME_ANTIALIAS_WEIGHTS to evaluate cubic weights once per -// fragment instead of at the usage site (which is used by -// default, because it enables static evaluation). -// Description: -// Each antialiased lookup follows these steps: -// 1.) Define a sample pattern of pixel offsets in the range of [-0.5, 0.5] -// pixels, spanning the diameter of a rectangular box filter. -// 2.) Scale these offsets by the support diameter of the user's chosen filter. -// 3.) Using these pixel offsets from the pixel center, compute the offsets to -// predefined subpixel locations. -// 4.) Compute filter weights based on subpixel offsets. -// Much of that can often be done at compile-time. At runtime: -// 1.) Project pixel-space offsets into uv-space with a matrix multiplication -// to get the uv offsets for each sample. Rectangular pixels have a -// diameter of 1.0. Circular pixels are not currently supported, but they -// might be better with a diameter of sqrt(2.0) to ensure there are no gaps -// between them. -// 2.) Load, weight, and sum samples. -// We use a sparse bilinear sampling grid, so there are two major implications: -// 1.) We can directly project the pixel-space support box into uv-space even -// if we're upsizing. This wouldn't be the case for nearest neighbor, -// where we'd have to expand the uv-space diameter to at least the support -// size to ensure sufficient filter support. In our case, this allows us -// to treat upsizing the same as downsizing and use static weighting. :) -// 2.) For decent results, negative-lobed filters must be computed based on -// separable weights, not radial distances, because the sparse sampling -// makes no guarantees about radial distributions. Even then, it's much -// better to set aa_xy_axis_importance to e.g. float2(1.0, 0.0) to use e.g. -// Lanczos2 horizontally and a box filter vertically. This is mainly due -// to the sparse N-queens sampling and a statistically enormous positive or -// negative covariance between horizontal and vertical weights. -// -// Design Decision Comments: -// "aa_temporal" mirrors the sample pattern on odd frames along the axis that -// keeps subpixel weights constant. This helps with rotational invariance, but -// it can cause distracting fluctuations, and horizontal and vertical edges -// will look the same. Using a different pattern on a shifted grid would -// exploit temporal AA better, but it would require a dynamic branch or a lot -// of conditional moves, so it's prohibitively slow for the minor benefit. - - -#include "helper-functions-and-macros.fxh" - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -// #if !ANTIALIAS_OVERRIDE_BASICS -// // The following settings must be static constants: -// static const float antialias_level = 12.0; -// static const float aa_filter = 0.0; -// static const bool aa_temporal = false; -// #endif - -#ifndef ANTIALIAS_OVERRIDE_STATIC_CONSTANTS - // Users may override these parameters, but the file structure requires - // them to be static constants; see the descriptions above. - static const float aa_pixel_diameter = 1.0; - static const float aa_lanczos_lobes = 3.0; - static const float aa_gauss_support = 1.0 / aa_pixel_diameter; - static const float aa_tent_support = 1.0 / aa_pixel_diameter; - - // If we're using a negative-lobed filter, default to using it horizontally - // only, and use only the first lobe vertically or a box filter, over a - // correspondingly smaller range. This compensates for the sparse sampling - // grid's typically large positive/negative x/y covariance. - static const float2 aa_xy_axis_importance = macro_cond( - aa_filter < 5.5, - float2(1.0, 1.0), // Box, tent, Gaussian - macro_cond( - aa_filter < 8.5, - float2(1.0, 0.0), // Cubic and Lanczos sinc - macro_cond( - aa_filter < 9.5, - float2(1.0, 1.0/aa_lanczos_lobes), // Lanczos jinc - float2(1.0, 1.0) // Default to box - ) - ) - ); -#endif - -// #if !ANTIALIAS_OVERRIDE_PARAMETERS -// // Users may override these values with their own uniform or static consts. -// // Cubics: See http://www.imagemagick.org/Usage/filter/#mitchell -// // 1.) "Keys cubics" with B = 1 - 2C are considered the highest quality. -// // 2.) C = 0.5 (default) is Catmull-Rom; higher C's apply sharpening. -// // 3.) C = 1.0/3.0 is the Mitchell-Netravali filter. -// // 4.) C = 0.0 is a soft spline filter. -// static const float aa_cubic_c = 0.5; -// static const float aa_gauss_sigma = 0.5 / aa_pixel_diameter; -// // Users may override the subpixel offset accessor function with their own. -// // A function is used for compatibility with scalar runtime shader params. -// float2 get_aa_subpixel_r_offset() -// { -// return float2(0.0, 0.0); -// } -// #endif - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "gamma-management.fxh" - - -////////////////////////////////// CONSTANTS ///////////////////////////////// - -static const float aa_box_support = 0.5; -static const float aa_cubic_support = 2.0; - - -//////////////////////////// GLOBAL NON-CONSTANTS //////////////////////////// - - -// We'll want to define these only once per fragment at most. -// Compute cubic coefficients on demand at runtime, and save them to global -// uniforms. The B parameter is computed from C, because "Keys cubics" -// with B = 1 - 2C are considered the highest quality. -static const float aa_cubic_b = 1.0 - 2.0*aa_cubic_c; -static const float cubic_branch1_x3_coeff = 12.0 - 9.0*aa_cubic_b - 6.0*aa_cubic_c; -static const float cubic_branch1_x2_coeff = -18.0 + 12.0*aa_cubic_b + 6.0*aa_cubic_c; -static const float cubic_branch1_x0_coeff = 6.0 - 2.0 * aa_cubic_b; -static const float cubic_branch2_x3_coeff = -aa_cubic_b - 6.0 * aa_cubic_c; -static const float cubic_branch2_x2_coeff = 6.0*aa_cubic_b + 30.0*aa_cubic_c; -static const float cubic_branch2_x1_coeff = -12.0*aa_cubic_b - 48.0*aa_cubic_c; -static const float cubic_branch2_x0_coeff = 8.0*aa_cubic_b + 24.0*aa_cubic_c; - - -/////////////////////////////////// HELPERS ////////////////////////////////// - -// In the RetroArch version, we can optionally implement aa_cubic_c as a uniform. -// I've disabled that for now because the asssociated mutable singleton mess was a -// pain to port. So for now, this function does absolutely nothing. Maybe I'll reintroduce -// the uniform implementation later, but for now the statics will do. -void assign_aa_cubic_constants() -{ - return; -} - -float4 get_subpixel_support_diam_and_final_axis_importance() -{ - // Statically select the base support radius: - static const float base_support_radius = - aa_filter < 1.5 ? aa_box_support : - aa_filter < 3.5 ? aa_tent_support : - aa_filter < 5.5 ? aa_gauss_support : - aa_filter < 7.5 ? aa_cubic_support : - aa_filter < 9.5 ? aa_lanczos_lobes : - aa_box_support; // Default to box - // Expand the filter support for subpixel filtering. - const float2 subpixel_support_radius_raw = - float2(base_support_radius, base_support_radius) + abs(get_aa_subpixel_r_offset()); - if(aa_filter < 1.5) - { - // Ignore aa_xy_axis_importance for box filtering. - const float2 subpixel_support_diam = - 2.0 * subpixel_support_radius_raw; - const float2 final_axis_importance = float2(1.0, 1.0); - return float4(subpixel_support_diam, final_axis_importance); - } - else - { - // Scale the support window by aa_xy_axis_importance, but don't narrow - // it further than box support. This allows decent vertical AA without - // messing up horizontal weights or using something silly like Lanczos4 - // horizontally with a huge vertical average over an 8-pixel radius. - const float2 subpixel_support_radius = max(float2(aa_box_support, aa_box_support), - subpixel_support_radius_raw * aa_xy_axis_importance); - // Adjust aa_xy_axis_importance to compensate for what's already done: - const float2 final_axis_importance = aa_xy_axis_importance * - subpixel_support_radius_raw/subpixel_support_radius; - const float2 subpixel_support_diam = 2.0 * subpixel_support_radius; - return float4(subpixel_support_diam, final_axis_importance); - } -} - - -/////////////////////////// FILTER WEIGHT FUNCTIONS ////////////////////////// - -float eval_box_filter(const float dist) -{ - return float(abs(dist) <= aa_box_support); -} - -float eval_separable_box_filter(const float2 offset) -{ - return float(all(bool2((abs(offset.x) <= aa_box_support), (abs(offset.y) <= aa_box_support)))); -} - -float eval_tent_filter(const float dist) -{ - return saturate((aa_tent_support - dist) / aa_tent_support); -} - -float eval_gaussian_filter(const float dist) -{ - return exp(-(dist*dist) / (2.0*aa_gauss_sigma*aa_gauss_sigma)); -} - -float eval_cubic_filter(const float dist) -{ - // Compute coefficients like assign_aa_cubic_constants(), but statically. - #if _RUNTIME_ANTIALIAS_WEIGHTS - // When runtime weights are used, these values are instead written to - // global uniforms at the beginning of each tex2Daa* call. - const float aa_cubic_b = 1.0 - 2.0*aa_cubic_c; - const float cubic_branch1_x3_coeff = 12.0 - 9.0*aa_cubic_b - 6.0*aa_cubic_c; - const float cubic_branch1_x2_coeff = -18.0 + 12.0*aa_cubic_b + 6.0*aa_cubic_c; - const float cubic_branch1_x0_coeff = 6.0 - 2.0 * aa_cubic_b; - const float cubic_branch2_x3_coeff = -aa_cubic_b - 6.0 * aa_cubic_c; - const float cubic_branch2_x2_coeff = 6.0*aa_cubic_b + 30.0*aa_cubic_c; - const float cubic_branch2_x1_coeff = -12.0*aa_cubic_b - 48.0*aa_cubic_c; - const float cubic_branch2_x0_coeff = 8.0*aa_cubic_b + 24.0*aa_cubic_c; - #endif - const float abs_dist = abs(dist); - // Compute the cubic based on the Horner's method formula in: - // http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf - return (abs_dist < 1.0 ? - (cubic_branch1_x3_coeff*abs_dist + - cubic_branch1_x2_coeff)*abs_dist*abs_dist + - cubic_branch1_x0_coeff : - abs_dist < 2.0 ? - ((cubic_branch2_x3_coeff*abs_dist + - cubic_branch2_x2_coeff)*abs_dist + - cubic_branch2_x1_coeff)*abs_dist + cubic_branch2_x0_coeff : - 0.0)/6.0; -} - -float eval_separable_cubic_filter(const float2 offset) -{ - // This is faster than using a specific float2 version: - return eval_cubic_filter(offset.x) * - eval_cubic_filter(offset.y); -} - -float2 eval_sinc_filter(const float2 offset) -{ - // It's faster to let the caller handle the zero case, or at least it - // was when I used macros and the shader preset took a full minute to load. - const float2 pi_offset = pi * offset; - return sin(pi_offset)/pi_offset; -} - -float eval_separable_lanczos_sinc_filter(const float2 offset_unsafe) -{ - // Note: For sparse sampling, you really need to pick an axis to use - // Lanczos along (e.g. set aa_xy_axis_importance = float2(1.0, 0.0)). - const float2 offset = FIX_ZERO(offset_unsafe); - const float2 xy_weights = eval_sinc_filter(offset) * - eval_sinc_filter(offset/aa_lanczos_lobes); - return xy_weights.x * xy_weights.y; -} - -float eval_jinc_filter_unorm(const float x) -{ - // This is a Jinc approximation for x in [0, 45). We'll use x in range - // [0, 4*pi) or so. There are faster/closer approximations based on - // piecewise cubics from [0, 45) and asymptotic approximations beyond that, - // but this has a maximum absolute error < 1/512, and it's simpler/faster - // for shaders...not that it's all that useful for sparse sampling anyway. - const float point3845_x = 0.38448566093564*x; - const float exp_term = exp(-(point3845_x*point3845_x)); - const float point8154_plus_x = 0.815362332840791 + x; - const float cos_term = cos(point8154_plus_x); - return ( - 0.0264727330997042*min(x, 6.83134964622778) + - 0.680823557250528*exp_term + - -0.0597255978950933*min(7.41043194481873, x)*cos_term / - (point8154_plus_x + 0.0646074538634482*(x*x) + - cos(x)*max(exp_term, cos(x) + cos_term)) - - 0.180837503591406); -} - -float eval_jinc_filter(const float dist) -{ - return eval_jinc_filter_unorm(pi * dist); -} - -float eval_lanczos_jinc_filter(const float dist) -{ - return eval_jinc_filter(dist) * eval_jinc_filter(dist/aa_lanczos_lobes); -} - - -float3 eval_unorm_rgb_weights(const float2 offset, - const float2 final_axis_importance) -{ - // Requires: 1.) final_axis_impportance must be computed according to - // get_subpixel_support_diam_and_final_axis_importance(). - // 2.) aa_filter must be a global constant. - // 3.) offset must be an xy pixel offset in the range: - // ([-subpixel_support_diameter.x/2, - // subpixel_support_diameter.x/2], - // [-subpixel_support_diameter.y/2, - // subpixel_support_diameter.y/2]) - // Returns: Sample weights at R/G/B destination subpixels for the - // given xy pixel offset. - const float2 offset_g = offset * final_axis_importance; - const float2 aa_r_offset = get_aa_subpixel_r_offset(); - const float2 offset_r = offset_g - aa_r_offset * final_axis_importance; - const float2 offset_b = offset_g + aa_r_offset * final_axis_importance; - // Statically select a filter: - if(aa_filter < 0.5) - { - return float3(eval_separable_box_filter(offset_r), - eval_separable_box_filter(offset_g), - eval_separable_box_filter(offset_b)); - } - else if(aa_filter < 1.5) - { - return float3(eval_box_filter(length(offset_r)), - eval_box_filter(length(offset_g)), - eval_box_filter(length(offset_b))); - } - else if(aa_filter < 2.5) - { - return float3( - eval_tent_filter(offset_r.x) * eval_tent_filter(offset_r.y), - eval_tent_filter(offset_g.x) * eval_tent_filter(offset_g.y), - eval_tent_filter(offset_b.x) * eval_tent_filter(offset_b.y)); - } - else if(aa_filter < 3.5) - { - return float3(eval_tent_filter(length(offset_r)), - eval_tent_filter(length(offset_g)), - eval_tent_filter(length(offset_b))); - } - else if(aa_filter < 4.5) - { - return float3( - eval_gaussian_filter(offset_r.x) * eval_gaussian_filter(offset_r.y), - eval_gaussian_filter(offset_g.x) * eval_gaussian_filter(offset_g.y), - eval_gaussian_filter(offset_b.x) * eval_gaussian_filter(offset_b.y)); - } - else if(aa_filter < 5.5) - { - return float3(eval_gaussian_filter(length(offset_r)), - eval_gaussian_filter(length(offset_g)), - eval_gaussian_filter(length(offset_b))); - } - else if(aa_filter < 6.5) - { - return float3( - eval_cubic_filter(offset_r.x) * eval_cubic_filter(offset_r.y), - eval_cubic_filter(offset_g.x) * eval_cubic_filter(offset_g.y), - eval_cubic_filter(offset_b.x) * eval_cubic_filter(offset_b.y)); - } - else if(aa_filter < 7.5) - { - return float3(eval_cubic_filter(length(offset_r)), - eval_cubic_filter(length(offset_g)), - eval_cubic_filter(length(offset_b))); - } - else if(aa_filter < 8.5) - { - return float3(eval_separable_lanczos_sinc_filter(offset_r), - eval_separable_lanczos_sinc_filter(offset_g), - eval_separable_lanczos_sinc_filter(offset_b)); - } - else if(aa_filter < 9.5) - { - return float3(eval_lanczos_jinc_filter(length(offset_r)), - eval_lanczos_jinc_filter(length(offset_g)), - eval_lanczos_jinc_filter(length(offset_b))); - } - else - { - // Default to a box, because Lanczos Jinc is so bad. ;) - return float3(eval_separable_box_filter(offset_r), - eval_separable_box_filter(offset_g), - eval_separable_box_filter(offset_b)); - } -} - - -////////////////////////////// HELPER FUNCTIONS ////////////////////////////// - -float4 tex2Daa_tiled_linearize(const sampler2D samp, const float2 s, const float input_gamma) -{ - // If we're manually tiling a texture, anisotropic filtering can get - // confused. This is one workaround: - #ifdef ANTIALIAS_DISABLE_ANISOTROPIC - // TODO: Use tex2Dlod_linearize with a calculated mip level. - return tex2Dlod_linearize(samp, float4(s, 0.0, 0.0), input_gamma); - #else - return tex2D_linearize(samp, s, input_gamma); - #endif -} - -float2 get_frame_sign(const float frame) -{ - if(aa_temporal) - { - // Mirror the sampling pattern for odd frames in a direction that - // lets us keep the same subpixel sample weights: - const float frame_odd = float(fmod(frame, 2.0) > 0.5); - const float2 aa_r_offset = get_aa_subpixel_r_offset(); - const float2 mirror = -float2(abs(aa_r_offset.x) < (FIX_ZERO(0.0)), abs(aa_r_offset.y) < (FIX_ZERO(0.0))); - return mirror; - } - else - { - return float2(1.0, 1.0); - } -} - - -///////////////////////// ANTIALIASED TEXTURE LOOKUPS //////////////////////// - -float3 tex2Daa_subpixel_weights_only(const sampler2D tex, - const float2 tex_uv, const float2x2 pixel_to_tex_uv, const float input_gamma) -{ - // This function is unlike the others: Just perform a single independent - // lookup for each subpixel. It may be very aliased. - const float2 aa_r_offset = get_aa_subpixel_r_offset(); - const float2 aa_r_offset_uv_offset = mul(pixel_to_tex_uv, aa_r_offset); - const float color_g = tex2D_linearize(tex, tex_uv, input_gamma).g; - const float color_r = tex2D_linearize(tex, tex_uv + aa_r_offset_uv_offset, input_gamma).r; - const float color_b = tex2D_linearize(tex, tex_uv - aa_r_offset_uv_offset, input_gamma).b; - return float3(color_r, color_g, color_b); -} - -// The tex2Daa* functions compile very slowly due to all the macros and -// compile-time math, so only include the ones we'll actually use! -float3 tex2Daa4x(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Use an RGMS4 pattern (4-queens): - // . . Q . : off =(-1.5, -1.5)/4 + (2.0, 0.0)/4 - // Q . . . : off =(-1.5, -1.5)/4 + (0.0, 1.0)/4 - // . . . Q : off =(-1.5, -1.5)/4 + (3.0, 2.0)/4 - // . Q . . : off =(-1.5, -1.5)/4 + (1.0, 3.0)/4 - // Static screenspace sample offsets (compute some implicitly): - static const float grid_size = 4.0; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float2 xy_step = 1.0/grid_size * subpixel_support_diameter; - const float2 xy_start_offset = (0.5 - grid_size*0.5) * xy_step; - // Get the xy offset of each sample. Exploit diagonal symmetry: - const float2 xy_offset0 = xy_start_offset + float2(2.0, 0.0) * xy_step; - const float2 xy_offset1 = xy_start_offset + float2(0.0, 1.0) * xy_step; - // Compute subpixel weights, and exploit diagonal symmetry for speed. - const float3 w0 = eval_unorm_rgb_weights(xy_offset0, final_axis_importance); - const float3 w1 = eval_unorm_rgb_weights(xy_offset1, final_axis_importance); - const float3 w2 = w1.bgr; - const float3 w3 = w0.bgr; - // Get the weight sum to normalize the total to 1.0 later: - const float3 half_sum = w0 + w1; - const float3 w_sum = half_sum + half_sum.bgr; - const float3 w_sum_inv = 1.0/w_sum; - // Scale the pixel-space to texture offset matrix by the pixel diameter. - const float2x2 true_pixel_to_tex_uv = pixel_to_tex_uv * aa_pixel_diameter; - // Get uv sample offsets, mirror on odd frames if directed, and exploit - // diagonal symmetry: - const float2 frame_sign = get_frame_sign(frame); - const float2 uv_offset0 = mul(true_pixel_to_tex_uv, xy_offset0 * frame_sign); - const float2 uv_offset1 = mul(true_pixel_to_tex_uv, xy_offset1 * frame_sign); - // Load samples, linearizing if necessary, etc.: - const float3 sample0 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset0, input_gamma).rgb; - const float3 sample1 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset1, input_gamma).rgb; - const float3 sample2 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset1, input_gamma).rgb; - const float3 sample3 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset0, input_gamma).rgb; - // Sum weighted samples (weight sum must equal 1.0 for each channel): - return w_sum_inv * (w0 * sample0 + w1 * sample1 + - w2 * sample2 + w3 * sample3); -} - -float3 tex2Daa5x(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Use a diagonally symmetric 5-queens pattern: - // . Q . . . : off =(-2.0, -2.0)/5 + (1.0, 0.0)/5 - // . . . . Q : off =(-2.0, -2.0)/5 + (4.0, 1.0)/5 - // . . Q . . : off =(-2.0, -2.0)/5 + (2.0, 2.0)/5 - // Q . . . . : off =(-2.0, -2.0)/5 + (0.0, 3.0)/5 - // . . . Q . : off =(-2.0, -2.0)/5 + (3.0, 4.0)/5 - // Static screenspace sample offsets (compute some implicitly): - static const float grid_size = 5.0; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float2 xy_step = 1.0/grid_size * subpixel_support_diameter; - const float2 xy_start_offset = (0.5 - grid_size*0.5) * xy_step; - // Get the xy offset of each sample. Exploit diagonal symmetry: - const float2 xy_offset0 = xy_start_offset + float2(1.0, 0.0) * xy_step; - const float2 xy_offset1 = xy_start_offset + float2(4.0, 1.0) * xy_step; - const float2 xy_offset2 = xy_start_offset + float2(2.0, 2.0) * xy_step; - // Compute subpixel weights, and exploit diagonal symmetry for speed. - const float3 w0 = eval_unorm_rgb_weights(xy_offset0, final_axis_importance); - const float3 w1 = eval_unorm_rgb_weights(xy_offset1, final_axis_importance); - const float3 w2 = eval_unorm_rgb_weights(xy_offset2, final_axis_importance); - const float3 w3 = w1.bgr; - const float3 w4 = w0.bgr; - // Get the weight sum to normalize the total to 1.0 later: - const float3 w_sum_inv = 1.0/(w0 + w1 + w2 + w3 + w4); - // Scale the pixel-space to texture offset matrix by the pixel diameter. - const float2x2 true_pixel_to_tex_uv = - float2x2((pixel_to_tex_uv * aa_pixel_diameter)); - // Get uv sample offsets, mirror on odd frames if directed, and exploit - // diagonal symmetry: - const float2 frame_sign = get_frame_sign(frame); - const float2 uv_offset0 = mul(true_pixel_to_tex_uv, xy_offset0 * frame_sign); - const float2 uv_offset1 = mul(true_pixel_to_tex_uv, xy_offset1 * frame_sign); - // Load samples, linearizing if necessary, etc.: - const float3 sample0 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset0, input_gamma).rgb; - const float3 sample1 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset1, input_gamma).rgb; - const float3 sample2 = tex2Daa_tiled_linearize(tex, tex_uv, input_gamma).rgb; - const float3 sample3 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset1, input_gamma).rgb; - const float3 sample4 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset0, input_gamma).rgb; - // Sum weighted samples (weight sum must equal 1.0 for each channel): - return w_sum_inv * (w0 * sample0 + w1 * sample1 + - w2 * sample2 + w3 * sample3 + w4 * sample4); - - // return (w0 + w1 + w2 + w3 + w4); -} - -float3 tex2Daa6x(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Use a diagonally symmetric 6-queens pattern with a stronger horizontal - // than vertical slant: - // . . . . Q . : off =(-2.5, -2.5)/6 + (4.0, 0.0)/6 - // . . Q . . . : off =(-2.5, -2.5)/6 + (2.0, 1.0)/6 - // Q . . . . . : off =(-2.5, -2.5)/6 + (0.0, 2.0)/6 - // . . . . . Q : off =(-2.5, -2.5)/6 + (5.0, 3.0)/6 - // . . . Q . . : off =(-2.5, -2.5)/6 + (3.0, 4.0)/6 - // . Q . . . . : off =(-2.5, -2.5)/6 + (1.0, 5.0)/6 - // Static screenspace sample offsets (compute some implicitly): - static const float grid_size = 6.0; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float2 xy_step = 1.0/grid_size * subpixel_support_diameter; - const float2 xy_start_offset = (0.5 - grid_size*0.5) * xy_step; - // Get the xy offset of each sample. Exploit diagonal symmetry: - const float2 xy_offset0 = xy_start_offset + float2(4.0, 0.0) * xy_step; - const float2 xy_offset1 = xy_start_offset + float2(2.0, 1.0) * xy_step; - const float2 xy_offset2 = xy_start_offset + float2(0.0, 2.0) * xy_step; - // Compute subpixel weights, and exploit diagonal symmetry for speed. - const float3 w0 = eval_unorm_rgb_weights(xy_offset0, final_axis_importance); - const float3 w1 = eval_unorm_rgb_weights(xy_offset1, final_axis_importance); - const float3 w2 = eval_unorm_rgb_weights(xy_offset2, final_axis_importance); - const float3 w3 = w2.bgr; - const float3 w4 = w1.bgr; - const float3 w5 = w0.bgr; - // Get the weight sum to normalize the total to 1.0 later: - const float3 half_sum = w0 + w1 + w2; - const float3 w_sum = half_sum + half_sum.bgr; - const float3 w_sum_inv = 1.0/(w_sum); - // Scale the pixel-space to texture offset matrix by the pixel diameter. - const float2x2 true_pixel_to_tex_uv = - float2x2((pixel_to_tex_uv * aa_pixel_diameter)); - // Get uv sample offsets, mirror on odd frames if directed, and exploit - // diagonal symmetry: - const float2 frame_sign = get_frame_sign(frame); - const float2 uv_offset0 = mul(true_pixel_to_tex_uv, xy_offset0 * frame_sign); - const float2 uv_offset1 = mul(true_pixel_to_tex_uv, xy_offset1 * frame_sign); - const float2 uv_offset2 = mul(true_pixel_to_tex_uv, xy_offset2 * frame_sign); - // Load samples, linearizing if necessary, etc.: - const float3 sample0 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset0, input_gamma).rgb; - const float3 sample1 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset1, input_gamma).rgb; - const float3 sample2 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset2, input_gamma).rgb; - const float3 sample3 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset2, input_gamma).rgb; - const float3 sample4 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset1, input_gamma).rgb; - const float3 sample5 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset0, input_gamma).rgb; - // Sum weighted samples (weight sum must equal 1.0 for each channel): - return w_sum_inv * (w0 * sample0 + w1 * sample1 + w2 * sample2 + - w3 * sample3 + w4 * sample4 + w5 * sample5); -} - -float3 tex2Daa7x(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Use a diagonally symmetric 7-queens pattern with a queen in the center: - // . Q . . . . . : off =(-3.0, -3.0)/7 + (1.0, 0.0)/7 - // . . . . Q . . : off =(-3.0, -3.0)/7 + (4.0, 1.0)/7 - // Q . . . . . . : off =(-3.0, -3.0)/7 + (0.0, 2.0)/7 - // . . . Q . . . : off =(-3.0, -3.0)/7 + (3.0, 3.0)/7 - // . . . . . . Q : off =(-3.0, -3.0)/7 + (6.0, 4.0)/7 - // . . Q . . . . : off =(-3.0, -3.0)/7 + (2.0, 5.0)/7 - // . . . . . Q . : off =(-3.0, -3.0)/7 + (5.0, 6.0)/7 - static const float grid_size = 7.0; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float2 xy_step = 1.0/grid_size * subpixel_support_diameter; - const float2 xy_start_offset = (0.5 - grid_size*0.5) * xy_step; - // Get the xy offset of each sample. Exploit diagonal symmetry: - const float2 xy_offset0 = xy_start_offset + float2(1.0, 0.0) * xy_step; - const float2 xy_offset1 = xy_start_offset + float2(4.0, 1.0) * xy_step; - const float2 xy_offset2 = xy_start_offset + float2(0.0, 2.0) * xy_step; - const float2 xy_offset3 = xy_start_offset + float2(3.0, 3.0) * xy_step; - // Compute subpixel weights, and exploit diagonal symmetry for speed. - const float3 w0 = eval_unorm_rgb_weights(xy_offset0, final_axis_importance); - const float3 w1 = eval_unorm_rgb_weights(xy_offset1, final_axis_importance); - const float3 w2 = eval_unorm_rgb_weights(xy_offset2, final_axis_importance); - const float3 w3 = eval_unorm_rgb_weights(xy_offset3, final_axis_importance); - const float3 w4 = w2.bgr; - const float3 w5 = w1.bgr; - const float3 w6 = w0.bgr; - // Get the weight sum to normalize the total to 1.0 later: - const float3 half_sum = w0 + w1 + w2; - const float3 w_sum = half_sum + half_sum.bgr + w3; - const float3 w_sum_inv = 1.0/(w_sum); - // Scale the pixel-space to texture offset matrix by the pixel diameter. - const float2x2 true_pixel_to_tex_uv = - float2x2((pixel_to_tex_uv * aa_pixel_diameter)); - // Get uv sample offsets, mirror on odd frames if directed, and exploit - // diagonal symmetry: - const float2 frame_sign = get_frame_sign(frame); - const float2 uv_offset0 = mul(true_pixel_to_tex_uv, xy_offset0 * frame_sign); - const float2 uv_offset1 = mul(true_pixel_to_tex_uv, xy_offset1 * frame_sign); - const float2 uv_offset2 = mul(true_pixel_to_tex_uv, xy_offset2 * frame_sign); - // Load samples, linearizing if necessary, etc.: - const float3 sample0 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset0, input_gamma).rgb; - const float3 sample1 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset1, input_gamma).rgb; - const float3 sample2 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset2, input_gamma).rgb; - const float3 sample3 = tex2Daa_tiled_linearize(tex, tex_uv, input_gamma).rgb; - const float3 sample4 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset2, input_gamma).rgb; - const float3 sample5 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset1, input_gamma).rgb; - const float3 sample6 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset0, input_gamma).rgb; - // Sum weighted samples (weight sum must equal 1.0 for each channel): - return w_sum_inv * ( - w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 + - w4 * sample4 + w5 * sample5 + w6 * sample6); -} - -float3 tex2Daa8x(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Use a diagonally symmetric 8-queens pattern. - // . . Q . . . . . : off =(-3.5, -3.5)/8 + (2.0, 0.0)/8 - // . . . . Q . . . : off =(-3.5, -3.5)/8 + (4.0, 1.0)/8 - // . Q . . . . . . : off =(-3.5, -3.5)/8 + (1.0, 2.0)/8 - // . . . . . . . Q : off =(-3.5, -3.5)/8 + (7.0, 3.0)/8 - // Q . . . . . . . : off =(-3.5, -3.5)/8 + (0.0, 4.0)/8 - // . . . . . . Q . : off =(-3.5, -3.5)/8 + (6.0, 5.0)/8 - // . . . Q . . . . : off =(-3.5, -3.5)/8 + (3.0, 6.0)/8 - // . . . . . Q . . : off =(-3.5, -3.5)/8 + (5.0, 7.0)/8 - static const float grid_size = 8.0; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float2 xy_step = 1.0/grid_size * subpixel_support_diameter; - const float2 xy_start_offset = (0.5 - grid_size*0.5) * xy_step; - // Get the xy offset of each sample. Exploit diagonal symmetry: - const float2 xy_offset0 = xy_start_offset + float2(2.0, 0.0) * xy_step; - const float2 xy_offset1 = xy_start_offset + float2(4.0, 1.0) * xy_step; - const float2 xy_offset2 = xy_start_offset + float2(1.0, 2.0) * xy_step; - const float2 xy_offset3 = xy_start_offset + float2(7.0, 3.0) * xy_step; - // Compute subpixel weights, and exploit diagonal symmetry for speed. - const float3 w0 = eval_unorm_rgb_weights(xy_offset0, final_axis_importance); - const float3 w1 = eval_unorm_rgb_weights(xy_offset1, final_axis_importance); - const float3 w2 = eval_unorm_rgb_weights(xy_offset2, final_axis_importance); - const float3 w3 = eval_unorm_rgb_weights(xy_offset3, final_axis_importance); - const float3 w4 = w3.bgr; - const float3 w5 = w2.bgr; - const float3 w6 = w1.bgr; - const float3 w7 = w0.bgr; - // Get the weight sum to normalize the total to 1.0 later: - const float3 half_sum = w0 + w1 + w2 + w3; - const float3 w_sum = half_sum + half_sum.bgr; - const float3 w_sum_inv = 1.0/(w_sum); - // Scale the pixel-space to texture offset matrix by the pixel diameter. - const float2x2 true_pixel_to_tex_uv = - float2x2((pixel_to_tex_uv * aa_pixel_diameter)); - // Get uv sample offsets, and mirror on odd frames if directed: - const float2 frame_sign = get_frame_sign(frame); - const float2 uv_offset0 = mul(true_pixel_to_tex_uv, xy_offset0 * frame_sign); - const float2 uv_offset1 = mul(true_pixel_to_tex_uv, xy_offset1 * frame_sign); - const float2 uv_offset2 = mul(true_pixel_to_tex_uv, xy_offset2 * frame_sign); - const float2 uv_offset3 = mul(true_pixel_to_tex_uv, xy_offset3 * frame_sign); - // Load samples, linearizing if necessary, etc.: - const float3 sample0 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset0, input_gamma).rgb; - const float3 sample1 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset1, input_gamma).rgb; - const float3 sample2 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset2, input_gamma).rgb; - const float3 sample3 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset3, input_gamma).rgb; - const float3 sample4 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset3, input_gamma).rgb; - const float3 sample5 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset2, input_gamma).rgb; - const float3 sample6 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset1, input_gamma).rgb; - const float3 sample7 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset0, input_gamma).rgb; - // Sum weighted samples (weight sum must equal 1.0 for each channel): - return w_sum_inv * ( - w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 + - w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7); -} - -float3 tex2Daa12x(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Use a diagonally symmetric 12-superqueens pattern where no 3 points are - // exactly collinear. - // . . . Q . . . . . . . . : off =(-5.5, -5.5)/12 + (3.0, 0.0)/12 - // . . . . . . . . . Q . . : off =(-5.5, -5.5)/12 + (9.0, 1.0)/12 - // . . . . . . Q . . . . . : off =(-5.5, -5.5)/12 + (6.0, 2.0)/12 - // . Q . . . . . . . . . . : off =(-5.5, -5.5)/12 + (1.0, 3.0)/12 - // . . . . . . . . . . . Q : off =(-5.5, -5.5)/12 + (11.0, 4.0)/12 - // . . . . Q . . . . . . . : off =(-5.5, -5.5)/12 + (4.0, 5.0)/12 - // . . . . . . . Q . . . . : off =(-5.5, -5.5)/12 + (7.0, 6.0)/12 - // Q . . . . . . . . . . . : off =(-5.5, -5.5)/12 + (0.0, 7.0)/12 - // . . . . . . . . . . Q . : off =(-5.5, -5.5)/12 + (10.0, 8.0)/12 - // . . . . . Q . . . . . . : off =(-5.5, -5.5)/12 + (5.0, 9.0)/12 - // . . Q . . . . . . . . . : off =(-5.5, -5.5)/12 + (2.0, 10.0)/12 - // . . . . . . . . Q . . . : off =(-5.5, -5.5)/12 + (8.0, 11.0)/12 - static const float grid_size = 12.0; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float2 xy_step = 1.0/grid_size * subpixel_support_diameter; - const float2 xy_start_offset = (0.5 - grid_size*0.5) * xy_step; - // Get the xy offset of each sample. Exploit diagonal symmetry: - const float2 xy_offset0 = xy_start_offset + float2(3.0, 0.0) * xy_step; - const float2 xy_offset1 = xy_start_offset + float2(9.0, 1.0) * xy_step; - const float2 xy_offset2 = xy_start_offset + float2(6.0, 2.0) * xy_step; - const float2 xy_offset3 = xy_start_offset + float2(1.0, 3.0) * xy_step; - const float2 xy_offset4 = xy_start_offset + float2(11.0, 4.0) * xy_step; - const float2 xy_offset5 = xy_start_offset + float2(4.0, 5.0) * xy_step; - // Compute subpixel weights, and exploit diagonal symmetry for speed. - const float3 w0 = eval_unorm_rgb_weights(xy_offset0, final_axis_importance); - const float3 w1 = eval_unorm_rgb_weights(xy_offset1, final_axis_importance); - const float3 w2 = eval_unorm_rgb_weights(xy_offset2, final_axis_importance); - const float3 w3 = eval_unorm_rgb_weights(xy_offset3, final_axis_importance); - const float3 w4 = eval_unorm_rgb_weights(xy_offset4, final_axis_importance); - const float3 w5 = eval_unorm_rgb_weights(xy_offset5, final_axis_importance); - const float3 w6 = w5.bgr; - const float3 w7 = w4.bgr; - const float3 w8 = w3.bgr; - const float3 w9 = w2.bgr; - const float3 w10 = w1.bgr; - const float3 w11 = w0.bgr; - // Get the weight sum to normalize the total to 1.0 later: - const float3 half_sum = w0 + w1 + w2 + w3 + w4 + w5; - const float3 w_sum = half_sum + half_sum.bgr; - const float3 w_sum_inv = 1.0/w_sum; - // Scale the pixel-space to texture offset matrix by the pixel diameter. - const float2x2 true_pixel_to_tex_uv = - float2x2((pixel_to_tex_uv * aa_pixel_diameter)); - // Get uv sample offsets, mirror on odd frames if directed, and exploit - // diagonal symmetry: - const float2 frame_sign = get_frame_sign(frame); - const float2 uv_offset0 = mul(true_pixel_to_tex_uv, xy_offset0 * frame_sign); - const float2 uv_offset1 = mul(true_pixel_to_tex_uv, xy_offset1 * frame_sign); - const float2 uv_offset2 = mul(true_pixel_to_tex_uv, xy_offset2 * frame_sign); - const float2 uv_offset3 = mul(true_pixel_to_tex_uv, xy_offset3 * frame_sign); - const float2 uv_offset4 = mul(true_pixel_to_tex_uv, xy_offset4 * frame_sign); - const float2 uv_offset5 = mul(true_pixel_to_tex_uv, xy_offset5 * frame_sign); - // Load samples, linearizing if necessary, etc.: - const float3 sample0 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset0, input_gamma).rgb; - const float3 sample1 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset1, input_gamma).rgb; - const float3 sample2 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset2, input_gamma).rgb; - const float3 sample3 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset3, input_gamma).rgb; - const float3 sample4 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset4, input_gamma).rgb; - const float3 sample5 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset5, input_gamma).rgb; - const float3 sample6 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset5, input_gamma).rgb; - const float3 sample7 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset4, input_gamma).rgb; - const float3 sample8 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset3, input_gamma).rgb; - const float3 sample9 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset2, input_gamma).rgb; - const float3 sample10 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset1, input_gamma).rgb; - const float3 sample11 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset0, input_gamma).rgb; - // Sum weighted samples (weight sum must equal 1.0 for each channel): - return w_sum_inv * ( - w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 + - w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 + - w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11); -} - -float3 tex2Daa16x(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Use a diagonally symmetric 16-superqueens pattern where no 3 points are - // exactly collinear. - // . . Q . . . . . . . . . . . . . : off =(-7.5, -7.5)/16 + (2.0, 0.0)/16 - // . . . . . . . . . Q . . . . . . : off =(-7.5, -7.5)/16 + (9.0, 1.0)/16 - // . . . . . . . . . . . . Q . . . : off =(-7.5, -7.5)/16 + (12.0, 2.0)/16 - // . . . . Q . . . . . . . . . . . : off =(-7.5, -7.5)/16 + (4.0, 3.0)/16 - // . . . . . . . . Q . . . . . . . : off =(-7.5, -7.5)/16 + (8.0, 4.0)/16 - // . . . . . . . . . . . . . . Q . : off =(-7.5, -7.5)/16 + (14.0, 5.0)/16 - // Q . . . . . . . . . . . . . . . : off =(-7.5, -7.5)/16 + (0.0, 6.0)/16 - // . . . . . . . . . . Q . . . . . : off =(-7.5, -7.5)/16 + (10.0, 7.0)/16 - // . . . . . Q . . . . . . . . . . : off =(-7.5, -7.5)/16 + (5.0, 8.0)/16 - // . . . . . . . . . . . . . . . Q : off =(-7.5, -7.5)/16 + (15.0, 9.0)/16 - // . Q . . . . . . . . . . . . . . : off =(-7.5, -7.5)/16 + (1.0, 10.0)/16 - // . . . . . . . Q . . . . . . . . : off =(-7.5, -7.5)/16 + (7.0, 11.0)/16 - // . . . . . . . . . . . Q . . . . : off =(-7.5, -7.5)/16 + (11.0, 12.0)/16 - // . . . Q . . . . . . . . . . . . : off =(-7.5, -7.5)/16 + (3.0, 13.0)/16 - // . . . . . . Q . . . . . . . . . : off =(-7.5, -7.5)/16 + (6.0, 14.0)/16 - // . . . . . . . . . . . . . Q . . : off =(-7.5, -7.5)/16 + (13.0, 15.0)/16 - static const float grid_size = 16.0; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float2 xy_step = 1.0/grid_size * subpixel_support_diameter; - const float2 xy_start_offset = (0.5 - grid_size*0.5) * xy_step; - // Get the xy offset of each sample. Exploit diagonal symmetry: - const float2 xy_offset0 = xy_start_offset + float2(2.0, 0.0) * xy_step; - const float2 xy_offset1 = xy_start_offset + float2(9.0, 1.0) * xy_step; - const float2 xy_offset2 = xy_start_offset + float2(12.0, 2.0) * xy_step; - const float2 xy_offset3 = xy_start_offset + float2(4.0, 3.0) * xy_step; - const float2 xy_offset4 = xy_start_offset + float2(8.0, 4.0) * xy_step; - const float2 xy_offset5 = xy_start_offset + float2(14.0, 5.0) * xy_step; - const float2 xy_offset6 = xy_start_offset + float2(0.0, 6.0) * xy_step; - const float2 xy_offset7 = xy_start_offset + float2(10.0, 7.0) * xy_step; - // Compute subpixel weights, and exploit diagonal symmetry for speed. - const float3 w0 = eval_unorm_rgb_weights(xy_offset0, final_axis_importance); - const float3 w1 = eval_unorm_rgb_weights(xy_offset1, final_axis_importance); - const float3 w2 = eval_unorm_rgb_weights(xy_offset2, final_axis_importance); - const float3 w3 = eval_unorm_rgb_weights(xy_offset3, final_axis_importance); - const float3 w4 = eval_unorm_rgb_weights(xy_offset4, final_axis_importance); - const float3 w5 = eval_unorm_rgb_weights(xy_offset5, final_axis_importance); - const float3 w6 = eval_unorm_rgb_weights(xy_offset6, final_axis_importance); - const float3 w7 = eval_unorm_rgb_weights(xy_offset7, final_axis_importance); - const float3 w8 = w7.bgr; - const float3 w9 = w6.bgr; - const float3 w10 = w5.bgr; - const float3 w11 = w4.bgr; - const float3 w12 = w3.bgr; - const float3 w13 = w2.bgr; - const float3 w14 = w1.bgr; - const float3 w15 = w0.bgr; - // Get the weight sum to normalize the total to 1.0 later: - const float3 half_sum = w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7; - const float3 w_sum = half_sum + half_sum.bgr; - const float3 w_sum_inv = 1.0/(w_sum); - // Scale the pixel-space to texture offset matrix by the pixel diameter. - const float2x2 true_pixel_to_tex_uv = - float2x2((pixel_to_tex_uv * aa_pixel_diameter)); - // Get uv sample offsets, mirror on odd frames if directed, and exploit - // diagonal symmetry: - const float2 frame_sign = get_frame_sign(frame); - const float2 uv_offset0 = mul(true_pixel_to_tex_uv, xy_offset0 * frame_sign); - const float2 uv_offset1 = mul(true_pixel_to_tex_uv, xy_offset1 * frame_sign); - const float2 uv_offset2 = mul(true_pixel_to_tex_uv, xy_offset2 * frame_sign); - const float2 uv_offset3 = mul(true_pixel_to_tex_uv, xy_offset3 * frame_sign); - const float2 uv_offset4 = mul(true_pixel_to_tex_uv, xy_offset4 * frame_sign); - const float2 uv_offset5 = mul(true_pixel_to_tex_uv, xy_offset5 * frame_sign); - const float2 uv_offset6 = mul(true_pixel_to_tex_uv, xy_offset6 * frame_sign); - const float2 uv_offset7 = mul(true_pixel_to_tex_uv, xy_offset7 * frame_sign); - // Load samples, linearizing if necessary, etc.: - const float3 sample0 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset0, input_gamma).rgb; - const float3 sample1 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset1, input_gamma).rgb; - const float3 sample2 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset2, input_gamma).rgb; - const float3 sample3 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset3, input_gamma).rgb; - const float3 sample4 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset4, input_gamma).rgb; - const float3 sample5 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset5, input_gamma).rgb; - const float3 sample6 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset6, input_gamma).rgb; - const float3 sample7 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset7, input_gamma).rgb; - const float3 sample8 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset7, input_gamma).rgb; - const float3 sample9 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset6, input_gamma).rgb; - const float3 sample10 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset5, input_gamma).rgb; - const float3 sample11 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset4, input_gamma).rgb; - const float3 sample12 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset3, input_gamma).rgb; - const float3 sample13 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset2, input_gamma).rgb; - const float3 sample14 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset1, input_gamma).rgb; - const float3 sample15 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset0, input_gamma).rgb; - // Sum weighted samples (weight sum must equal 1.0 for each channel): - return w_sum_inv * ( - w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 + - w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 + - w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 + - w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15); -} - -float3 tex2Daa20x(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Use a diagonally symmetric 20-superqueens pattern where no 3 points are - // exactly collinear and superqueens have a squared attack radius of 13. - // . . . . . . . Q . . . . . . . . . . . . : off =(-9.5, -9.5)/20 + (7.0, 0.0)/20 - // . . . . . . . . . . . . . . . . Q . . . : off =(-9.5, -9.5)/20 + (16.0, 1.0)/20 - // . . . . . . . . . . . Q . . . . . . . . : off =(-9.5, -9.5)/20 + (11.0, 2.0)/20 - // . Q . . . . . . . . . . . . . . . . . . : off =(-9.5, -9.5)/20 + (1.0, 3.0)/20 - // . . . . . Q . . . . . . . . . . . . . . : off =(-9.5, -9.5)/20 + (5.0, 4.0)/20 - // . . . . . . . . . . . . . . . Q . . . . : off =(-9.5, -9.5)/20 + (15.0, 5.0)/20 - // . . . . . . . . . . Q . . . . . . . . . : off =(-9.5, -9.5)/20 + (10.0, 6.0)/20 - // . . . . . . . . . . . . . . . . . . . Q : off =(-9.5, -9.5)/20 + (19.0, 7.0)/20 - // . . Q . . . . . . . . . . . . . . . . . : off =(-9.5, -9.5)/20 + (2.0, 8.0)/20 - // . . . . . . Q . . . . . . . . . . . . . : off =(-9.5, -9.5)/20 + (6.0, 9.0)/20 - // . . . . . . . . . . . . . Q . . . . . . : off =(-9.5, -9.5)/20 + (13.0, 10.0)/20 - // . . . . . . . . . . . . . . . . . Q . . : off =(-9.5, -9.5)/20 + (17.0, 11.0)/20 - // Q . . . . . . . . . . . . . . . . . . . : off =(-9.5, -9.5)/20 + (0.0, 12.0)/20 - // . . . . . . . . . Q . . . . . . . . . . : off =(-9.5, -9.5)/20 + (9.0, 13.0)/20 - // . . . . Q . . . . . . . . . . . . . . . : off =(-9.5, -9.5)/20 + (4.0, 14.0)/20 - // . . . . . . . . . . . . . . Q . . . . . : off =(-9.5, -9.5)/20 + (14.0, 15.0)/20 - // . . . . . . . . . . . . . . . . . . Q . : off =(-9.5, -9.5)/20 + (18.0, 16.0)/20 - // . . . . . . . . Q . . . . . . . . . . . : off =(-9.5, -9.5)/20 + (8.0, 17.0)/20 - // . . . Q . . . . . . . . . . . . . . . . : off =(-9.5, -9.5)/20 + (3.0, 18.0)/20 - // . . . . . . . . . . . . Q . . . . . . . : off =(-9.5, -9.5)/20 + (12.0, 19.0)/20 - static const float grid_size = 20.0; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float2 xy_step = 1.0/grid_size * subpixel_support_diameter; - const float2 xy_start_offset = (0.5 - grid_size*0.5) * xy_step; - // Get the xy offset of each sample. Exploit diagonal symmetry: - const float2 xy_offset0 = xy_start_offset + float2(7.0, 0.0) * xy_step; - const float2 xy_offset1 = xy_start_offset + float2(16.0, 1.0) * xy_step; - const float2 xy_offset2 = xy_start_offset + float2(11.0, 2.0) * xy_step; - const float2 xy_offset3 = xy_start_offset + float2(1.0, 3.0) * xy_step; - const float2 xy_offset4 = xy_start_offset + float2(5.0, 4.0) * xy_step; - const float2 xy_offset5 = xy_start_offset + float2(15.0, 5.0) * xy_step; - const float2 xy_offset6 = xy_start_offset + float2(10.0, 6.0) * xy_step; - const float2 xy_offset7 = xy_start_offset + float2(19.0, 7.0) * xy_step; - const float2 xy_offset8 = xy_start_offset + float2(2.0, 8.0) * xy_step; - const float2 xy_offset9 = xy_start_offset + float2(6.0, 9.0) * xy_step; - // Compute subpixel weights, and exploit diagonal symmetry for speed. - const float3 w0 = eval_unorm_rgb_weights(xy_offset0, final_axis_importance); - const float3 w1 = eval_unorm_rgb_weights(xy_offset1, final_axis_importance); - const float3 w2 = eval_unorm_rgb_weights(xy_offset2, final_axis_importance); - const float3 w3 = eval_unorm_rgb_weights(xy_offset3, final_axis_importance); - const float3 w4 = eval_unorm_rgb_weights(xy_offset4, final_axis_importance); - const float3 w5 = eval_unorm_rgb_weights(xy_offset5, final_axis_importance); - const float3 w6 = eval_unorm_rgb_weights(xy_offset6, final_axis_importance); - const float3 w7 = eval_unorm_rgb_weights(xy_offset7, final_axis_importance); - const float3 w8 = eval_unorm_rgb_weights(xy_offset8, final_axis_importance); - const float3 w9 = eval_unorm_rgb_weights(xy_offset9, final_axis_importance); - const float3 w10 = w9.bgr; - const float3 w11 = w8.bgr; - const float3 w12 = w7.bgr; - const float3 w13 = w6.bgr; - const float3 w14 = w5.bgr; - const float3 w15 = w4.bgr; - const float3 w16 = w3.bgr; - const float3 w17 = w2.bgr; - const float3 w18 = w1.bgr; - const float3 w19 = w0.bgr; - // Get the weight sum to normalize the total to 1.0 later: - const float3 half_sum = w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9; - const float3 w_sum = half_sum + half_sum.bgr; - const float3 w_sum_inv = 1.0/(w_sum); - // Scale the pixel-space to texture offset matrix by the pixel diameter. - const float2x2 true_pixel_to_tex_uv = - float2x2((pixel_to_tex_uv * aa_pixel_diameter)); - // Get uv sample offsets, mirror on odd frames if directed, and exploit - // diagonal symmetry: - const float2 frame_sign = get_frame_sign(frame); - const float2 uv_offset0 = mul(true_pixel_to_tex_uv, xy_offset0 * frame_sign); - const float2 uv_offset1 = mul(true_pixel_to_tex_uv, xy_offset1 * frame_sign); - const float2 uv_offset2 = mul(true_pixel_to_tex_uv, xy_offset2 * frame_sign); - const float2 uv_offset3 = mul(true_pixel_to_tex_uv, xy_offset3 * frame_sign); - const float2 uv_offset4 = mul(true_pixel_to_tex_uv, xy_offset4 * frame_sign); - const float2 uv_offset5 = mul(true_pixel_to_tex_uv, xy_offset5 * frame_sign); - const float2 uv_offset6 = mul(true_pixel_to_tex_uv, xy_offset6 * frame_sign); - const float2 uv_offset7 = mul(true_pixel_to_tex_uv, xy_offset7 * frame_sign); - const float2 uv_offset8 = mul(true_pixel_to_tex_uv, xy_offset8 * frame_sign); - const float2 uv_offset9 = mul(true_pixel_to_tex_uv, xy_offset9 * frame_sign); - // Load samples, linearizing if necessary, etc.: - const float3 sample0 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset0, input_gamma).rgb; - const float3 sample1 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset1, input_gamma).rgb; - const float3 sample2 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset2, input_gamma).rgb; - const float3 sample3 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset3, input_gamma).rgb; - const float3 sample4 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset4, input_gamma).rgb; - const float3 sample5 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset5, input_gamma).rgb; - const float3 sample6 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset6, input_gamma).rgb; - const float3 sample7 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset7, input_gamma).rgb; - const float3 sample8 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset8, input_gamma).rgb; - const float3 sample9 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset9, input_gamma).rgb; - const float3 sample10 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset9, input_gamma).rgb; - const float3 sample11 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset8, input_gamma).rgb; - const float3 sample12 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset7, input_gamma).rgb; - const float3 sample13 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset6, input_gamma).rgb; - const float3 sample14 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset5, input_gamma).rgb; - const float3 sample15 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset4, input_gamma).rgb; - const float3 sample16 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset3, input_gamma).rgb; - const float3 sample17 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset2, input_gamma).rgb; - const float3 sample18 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset1, input_gamma).rgb; - const float3 sample19 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset0, input_gamma).rgb; - // Sum weighted samples (weight sum must equal 1.0 for each channel): - return w_sum_inv * ( - w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 + - w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 + - w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 + - w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15 + - w16 * sample16 + w17 * sample17 + w18 * sample18 + w19 * sample19); -} - -float3 tex2Daa24x(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Use a diagonally symmetric 24-superqueens pattern where no 3 points are - // exactly collinear and superqueens have a squared attack radius of 13. - // . . . . . . Q . . . . . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (6.0, 0.0)/24 - // . . . . . . . . . . . . . . . . Q . . . . . . . : off =(-11.5, -11.5)/24 + (16.0, 1.0)/24 - // . . . . . . . . . . Q . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (10.0, 2.0)/24 - // . . . . . . . . . . . . . . . . . . . . . Q . . : off =(-11.5, -11.5)/24 + (21.0, 3.0)/24 - // . . . . . Q . . . . . . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (5.0, 4.0)/24 - // . . . . . . . . . . . . . . . Q . . . . . . . . : off =(-11.5, -11.5)/24 + (15.0, 5.0)/24 - // . Q . . . . . . . . . . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (1.0, 6.0)/24 - // . . . . . . . . . . . Q . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (11.0, 7.0)/24 - // . . . . . . . . . . . . . . . . . . . Q . . . . : off =(-11.5, -11.5)/24 + (19.0, 8.0)/24 - // . . . . . . . . . . . . . . . . . . . . . . . Q : off =(-11.5, -11.5)/24 + (23.0, 9.0)/24 - // . . . Q . . . . . . . . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (3.0, 10.0)/24 - // . . . . . . . . . . . . . . Q . . . . . . . . . : off =(-11.5, -11.5)/24 + (14.0, 11.0)/24 - // . . . . . . . . . Q . . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (9.0, 12.0)/24 - // . . . . . . . . . . . . . . . . . . . . Q . . . : off =(-11.5, -11.5)/24 + (20.0, 13.0)/24 - // Q . . . . . . . . . . . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (0.0, 14.0)/24 - // . . . . Q . . . . . . . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (4.0, 15.0)/24 - // . . . . . . . . . . . . Q . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (12.0, 16.0)/24 - // . . . . . . . . . . . . . . . . . . . . . . Q . : off =(-11.5, -11.5)/24 + (22.0, 17.0)/24 - // . . . . . . . . Q . . . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (8.0, 18.0)/24 - // . . . . . . . . . . . . . . . . . . Q . . . . . : off =(-11.5, -11.5)/24 + (18.0, 19.0)/24 - // . . Q . . . . . . . . . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (2.0, 20.0)/24 - // . . . . . . . . . . . . . Q . . . . . . . . . . : off =(-11.5, -11.5)/24 + (13.0, 21.0)/24 - // . . . . . . . Q . . . . . . . . . . . . . . . . : off =(-11.5, -11.5)/24 + (7.0, 22.0)/24 - // . . . . . . . . . . . . . . . . . Q . . . . . . : off =(-11.5, -11.5)/24 + (17.0, 23.0)/24 - static const float grid_size = 24.0; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float2 xy_step = 1.0/grid_size * subpixel_support_diameter; - const float2 xy_start_offset = (0.5 - grid_size*0.5) * xy_step; - // Get the xy offset of each sample. Exploit diagonal symmetry: - const float2 xy_offset0 = xy_start_offset + float2(6.0, 0.0) * xy_step; - const float2 xy_offset1 = xy_start_offset + float2(16.0, 1.0) * xy_step; - const float2 xy_offset2 = xy_start_offset + float2(10.0, 2.0) * xy_step; - const float2 xy_offset3 = xy_start_offset + float2(21.0, 3.0) * xy_step; - const float2 xy_offset4 = xy_start_offset + float2(5.0, 4.0) * xy_step; - const float2 xy_offset5 = xy_start_offset + float2(15.0, 5.0) * xy_step; - const float2 xy_offset6 = xy_start_offset + float2(1.0, 6.0) * xy_step; - const float2 xy_offset7 = xy_start_offset + float2(11.0, 7.0) * xy_step; - const float2 xy_offset8 = xy_start_offset + float2(19.0, 8.0) * xy_step; - const float2 xy_offset9 = xy_start_offset + float2(23.0, 9.0) * xy_step; - const float2 xy_offset10 = xy_start_offset + float2(3.0, 10.0) * xy_step; - const float2 xy_offset11 = xy_start_offset + float2(14.0, 11.0) * xy_step; - // Compute subpixel weights, and exploit diagonal symmetry for speed. - const float3 w0 = eval_unorm_rgb_weights(xy_offset0, final_axis_importance); - const float3 w1 = eval_unorm_rgb_weights(xy_offset1, final_axis_importance); - const float3 w2 = eval_unorm_rgb_weights(xy_offset2, final_axis_importance); - const float3 w3 = eval_unorm_rgb_weights(xy_offset3, final_axis_importance); - const float3 w4 = eval_unorm_rgb_weights(xy_offset4, final_axis_importance); - const float3 w5 = eval_unorm_rgb_weights(xy_offset5, final_axis_importance); - const float3 w6 = eval_unorm_rgb_weights(xy_offset6, final_axis_importance); - const float3 w7 = eval_unorm_rgb_weights(xy_offset7, final_axis_importance); - const float3 w8 = eval_unorm_rgb_weights(xy_offset8, final_axis_importance); - const float3 w9 = eval_unorm_rgb_weights(xy_offset9, final_axis_importance); - const float3 w10 = eval_unorm_rgb_weights(xy_offset10, final_axis_importance); - const float3 w11 = eval_unorm_rgb_weights(xy_offset11, final_axis_importance); - const float3 w12 = w11.bgr; - const float3 w13 = w10.bgr; - const float3 w14 = w9.bgr; - const float3 w15 = w8.bgr; - const float3 w16 = w7.bgr; - const float3 w17 = w6.bgr; - const float3 w18 = w5.bgr; - const float3 w19 = w4.bgr; - const float3 w20 = w3.bgr; - const float3 w21 = w2.bgr; - const float3 w22 = w1.bgr; - const float3 w23 = w0.bgr; - // Get the weight sum to normalize the total to 1.0 later: - const float3 half_sum = w0 + w1 + w2 + w3 + w4 + - w5 + w6 + w7 + w8 + w9 + w10 + w11; - const float3 w_sum = half_sum + half_sum.bgr; - const float3 w_sum_inv = 1.0/(w_sum); - // Scale the pixel-space to texture offset matrix by the pixel diameter. - const float2x2 true_pixel_to_tex_uv = - float2x2((pixel_to_tex_uv * aa_pixel_diameter)); - // Get uv sample offsets, mirror on odd frames if directed, and exploit - // diagonal symmetry: - const float2 frame_sign = get_frame_sign(frame); - const float2 uv_offset0 = mul(true_pixel_to_tex_uv, xy_offset0 * frame_sign); - const float2 uv_offset1 = mul(true_pixel_to_tex_uv, xy_offset1 * frame_sign); - const float2 uv_offset2 = mul(true_pixel_to_tex_uv, xy_offset2 * frame_sign); - const float2 uv_offset3 = mul(true_pixel_to_tex_uv, xy_offset3 * frame_sign); - const float2 uv_offset4 = mul(true_pixel_to_tex_uv, xy_offset4 * frame_sign); - const float2 uv_offset5 = mul(true_pixel_to_tex_uv, xy_offset5 * frame_sign); - const float2 uv_offset6 = mul(true_pixel_to_tex_uv, xy_offset6 * frame_sign); - const float2 uv_offset7 = mul(true_pixel_to_tex_uv, xy_offset7 * frame_sign); - const float2 uv_offset8 = mul(true_pixel_to_tex_uv, xy_offset8 * frame_sign); - const float2 uv_offset9 = mul(true_pixel_to_tex_uv, xy_offset9 * frame_sign); - const float2 uv_offset10 = mul(true_pixel_to_tex_uv, xy_offset10 * frame_sign); - const float2 uv_offset11 = mul(true_pixel_to_tex_uv, xy_offset11 * frame_sign); - // Load samples, linearizing if necessary, etc.: - const float3 sample0 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset0, input_gamma).rgb; - const float3 sample1 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset1, input_gamma).rgb; - const float3 sample2 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset2, input_gamma).rgb; - const float3 sample3 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset3, input_gamma).rgb; - const float3 sample4 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset4, input_gamma).rgb; - const float3 sample5 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset5, input_gamma).rgb; - const float3 sample6 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset6, input_gamma).rgb; - const float3 sample7 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset7, input_gamma).rgb; - const float3 sample8 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset8, input_gamma).rgb; - const float3 sample9 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset9, input_gamma).rgb; - const float3 sample10 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset10, input_gamma).rgb; - const float3 sample11 = tex2Daa_tiled_linearize(tex, tex_uv + uv_offset11, input_gamma).rgb; - const float3 sample12 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset11, input_gamma).rgb; - const float3 sample13 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset10, input_gamma).rgb; - const float3 sample14 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset9, input_gamma).rgb; - const float3 sample15 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset8, input_gamma).rgb; - const float3 sample16 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset7, input_gamma).rgb; - const float3 sample17 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset6, input_gamma).rgb; - const float3 sample18 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset5, input_gamma).rgb; - const float3 sample19 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset4, input_gamma).rgb; - const float3 sample20 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset3, input_gamma).rgb; - const float3 sample21 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset2, input_gamma).rgb; - const float3 sample22 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset1, input_gamma).rgb; - const float3 sample23 = tex2Daa_tiled_linearize(tex, tex_uv - uv_offset0, input_gamma).rgb; - // Sum weighted samples (weight sum must equal 1.0 for each channel): - return w_sum_inv * ( - w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 + - w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 + - w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 + - w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15 + - w16 * sample16 + w17 * sample17 + w18 * sample18 + w19 * sample19 + - w20 * sample20 + w21 * sample21 + w22 * sample22 + w23 * sample23); -} - -float3 tex2Daa_debug_16x_regular(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Sample on a regular 4x4 grid. This is mainly for testing. - static const float grid_size = 4.0; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float2 xy_step = 1.0/grid_size * subpixel_support_diameter; - const float2 xy_start_offset = (0.5 - grid_size*0.5) * xy_step; - // Get the xy offset of each sample: - const float2 xy_offset0 = xy_start_offset + float2(0.0, 0.0) * xy_step; - const float2 xy_offset1 = xy_start_offset + float2(1.0, 0.0) * xy_step; - const float2 xy_offset2 = xy_start_offset + float2(2.0, 0.0) * xy_step; - const float2 xy_offset3 = xy_start_offset + float2(3.0, 0.0) * xy_step; - const float2 xy_offset4 = xy_start_offset + float2(0.0, 1.0) * xy_step; - const float2 xy_offset5 = xy_start_offset + float2(1.0, 1.0) * xy_step; - const float2 xy_offset6 = xy_start_offset + float2(2.0, 1.0) * xy_step; - const float2 xy_offset7 = xy_start_offset + float2(3.0, 1.0) * xy_step; - // Compute subpixel weights, and exploit diagonal symmetry for speed. - // (We can't exploit vertical or horizontal symmetry due to uncertain - // subpixel offsets. We could fix that by rotating xy offsets with the - // subpixel structure, but...no.) - const float3 w0 = eval_unorm_rgb_weights(xy_offset0, final_axis_importance); - const float3 w1 = eval_unorm_rgb_weights(xy_offset1, final_axis_importance); - const float3 w2 = eval_unorm_rgb_weights(xy_offset2, final_axis_importance); - const float3 w3 = eval_unorm_rgb_weights(xy_offset3, final_axis_importance); - const float3 w4 = eval_unorm_rgb_weights(xy_offset4, final_axis_importance); - const float3 w5 = eval_unorm_rgb_weights(xy_offset5, final_axis_importance); - const float3 w6 = eval_unorm_rgb_weights(xy_offset6, final_axis_importance); - const float3 w7 = eval_unorm_rgb_weights(xy_offset7, final_axis_importance); - const float3 w8 = w7.bgr; - const float3 w9 = w6.bgr; - const float3 w10 = w5.bgr; - const float3 w11 = w4.bgr; - const float3 w12 = w3.bgr; - const float3 w13 = w2.bgr; - const float3 w14 = w1.bgr; - const float3 w15 = w0.bgr; - // Get the weight sum to normalize the total to 1.0 later: - const float3 half_sum = w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7; - const float3 w_sum = half_sum + half_sum.bgr; - const float3 w_sum_inv = 1.0/(w_sum); - // Scale the pixel-space to texture offset matrix by the pixel diameter. - const float2x2 true_pixel_to_tex_uv = - float2x2((pixel_to_tex_uv * aa_pixel_diameter)); - // Get uv sample offsets, taking advantage of row alignment: - const float2 uv_step_x = mul(true_pixel_to_tex_uv, float2(xy_step.x, 0.0)); - const float2 uv_step_y = mul(true_pixel_to_tex_uv, float2(0.0, xy_step.y)); - const float2 uv_offset0 = -1.5 * (uv_step_x + uv_step_y); - const float2 sample0_uv = tex_uv + uv_offset0; - const float2 sample4_uv = sample0_uv + uv_step_y; - const float2 sample8_uv = sample0_uv + uv_step_y * 2.0; - const float2 sample12_uv = sample0_uv + uv_step_y * 3.0; - // Load samples, linearizing if necessary, etc.: - const float3 sample0 = tex2Daa_tiled_linearize(tex, sample0_uv, input_gamma).rgb; - const float3 sample1 = tex2Daa_tiled_linearize(tex, sample0_uv + uv_step_x, input_gamma).rgb; - const float3 sample2 = tex2Daa_tiled_linearize(tex, sample0_uv + uv_step_x * 2.0, input_gamma).rgb; - const float3 sample3 = tex2Daa_tiled_linearize(tex, sample0_uv + uv_step_x * 3.0, input_gamma).rgb; - const float3 sample4 = tex2Daa_tiled_linearize(tex, sample4_uv, input_gamma).rgb; - const float3 sample5 = tex2Daa_tiled_linearize(tex, sample4_uv + uv_step_x, input_gamma).rgb; - const float3 sample6 = tex2Daa_tiled_linearize(tex, sample4_uv + uv_step_x * 2.0, input_gamma).rgb; - const float3 sample7 = tex2Daa_tiled_linearize(tex, sample4_uv + uv_step_x * 3.0, input_gamma).rgb; - const float3 sample8 = tex2Daa_tiled_linearize(tex, sample8_uv, input_gamma).rgb; - const float3 sample9 = tex2Daa_tiled_linearize(tex, sample8_uv + uv_step_x, input_gamma).rgb; - const float3 sample10 = tex2Daa_tiled_linearize(tex, sample8_uv + uv_step_x * 2.0, input_gamma).rgb; - const float3 sample11 = tex2Daa_tiled_linearize(tex, sample8_uv + uv_step_x * 3.0, input_gamma).rgb; - const float3 sample12 = tex2Daa_tiled_linearize(tex, sample12_uv, input_gamma).rgb; - const float3 sample13 = tex2Daa_tiled_linearize(tex, sample12_uv + uv_step_x, input_gamma).rgb; - const float3 sample14 = tex2Daa_tiled_linearize(tex, sample12_uv + uv_step_x * 2.0, input_gamma).rgb; - const float3 sample15 = tex2Daa_tiled_linearize(tex, sample12_uv + uv_step_x * 3.0, input_gamma).rgb; - // Sum weighted samples (weight sum must equal 1.0 for each channel): - return w_sum_inv * ( - w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 + - w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 + - w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 + - w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15); -} - -float3 tex2Daa_debug_dynamic(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // This function is for testing only: Use an NxN grid with dynamic weights. - static const int grid_size = 8; - assign_aa_cubic_constants(); - const float4 ssd_fai = get_subpixel_support_diam_and_final_axis_importance(); - const float2 subpixel_support_diameter = ssd_fai.xy; - const float2 final_axis_importance = ssd_fai.zw; - const float grid_radius_in_samples = (float(grid_size) - 1.0)/2.0; - const float2 filter_space_offset_step = - subpixel_support_diameter / grid_size; - const float2 sample0_filter_space_offset = - -grid_radius_in_samples * filter_space_offset_step; - // Compute xy sample offsets and subpixel weights: - float3 weights[64]; // grid_size * grid_size - float3 weight_sum = float3(0.0, 0.0, 0.0); - for(int i = 0; i < grid_size; ++i) - { - for(int j = 0; j < grid_size; ++j) - { - // Weights based on xy distances: - const float2 offset = sample0_filter_space_offset + - float2(j, i) * filter_space_offset_step; - const float3 weight = eval_unorm_rgb_weights(offset, final_axis_importance); - weights[i*grid_size + j] = weight; - weight_sum += weight; - } - } - // Get uv offset vectors along x and y directions: - const float2x2 true_pixel_to_tex_uv = - float2x2((pixel_to_tex_uv * aa_pixel_diameter)); - const float2 uv_offset_step_x = mul(true_pixel_to_tex_uv, - float2(filter_space_offset_step.x, 0.0)); - const float2 uv_offset_step_y = mul(true_pixel_to_tex_uv, - float2(0.0, filter_space_offset_step.y)); - // Get a starting sample location: - const float2 sample0_uv_offset = -grid_radius_in_samples * - (uv_offset_step_x + uv_offset_step_y); - const float2 sample0_uv = tex_uv + sample0_uv_offset; - // Load, weight, and sum [linearized] samples: - float3 sum = float3(0.0, 0.0, 0.0); - const float3 weight_sum_inv = 1.0/weight_sum; - for(int i = 0; i < grid_size; ++i) - { - const float2 row_i_first_sample_uv = - sample0_uv + i * uv_offset_step_y; - for(int j = 0; j < grid_size; ++j) - { - const float2 sample_uv = - row_i_first_sample_uv + j * uv_offset_step_x; - sum += weights[i*grid_size + j] * - tex2Daa_tiled_linearize(tex, sample_uv, input_gamma).rgb; - } - } - return sum * weight_sum_inv; -} - - -/////////////////////// ANTIALIASING CODEPATH SELECTION ////////////////////// - -float3 tex2Daa(const sampler2D tex, const float2 tex_uv, - const float2x2 pixel_to_tex_uv, const float frame, const float input_gamma) -{ - // Statically switch between antialiasing modes/levels: - return (antialias_level < 0.5) ? tex2D_linearize(tex, tex_uv, input_gamma).rgb : - (antialias_level < 3.5) ? tex2Daa_subpixel_weights_only( - tex, tex_uv, pixel_to_tex_uv, input_gamma) : - (antialias_level < 4.5) ? tex2Daa4x(tex, tex_uv, pixel_to_tex_uv, frame, input_gamma) : - (antialias_level < 5.5) ? tex2Daa5x(tex, tex_uv, pixel_to_tex_uv, frame, input_gamma) : - (antialias_level < 6.5) ? tex2Daa6x(tex, tex_uv, pixel_to_tex_uv, frame, input_gamma) : - (antialias_level < 7.5) ? tex2Daa7x(tex, tex_uv, pixel_to_tex_uv, frame, input_gamma) : - (antialias_level < 11.5) ? tex2Daa8x(tex, tex_uv, pixel_to_tex_uv, frame, input_gamma) : - (antialias_level < 15.5) ? tex2Daa12x(tex, tex_uv, pixel_to_tex_uv, frame, input_gamma) : - (antialias_level < 19.5) ? tex2Daa16x(tex, tex_uv, pixel_to_tex_uv, frame, input_gamma) : - (antialias_level < 23.5) ? tex2Daa20x(tex, tex_uv, pixel_to_tex_uv, frame, input_gamma) : - (antialias_level < 253.5) ? tex2Daa24x(tex, tex_uv, pixel_to_tex_uv, frame, input_gamma) : - (antialias_level < 254.5) ? tex2Daa_debug_16x_regular( - tex, tex_uv, pixel_to_tex_uv, frame, input_gamma) : - tex2Daa_debug_dynamic(tex, tex_uv, pixel_to_tex_uv, frame, input_gamma); -} - - -#endif // _TEX2DANTIALIAS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/lib/user-settings.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/lib/user-settings.fxh deleted file mode 100644 index 90ffda8f9..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/lib/user-settings.fxh +++ /dev/null @@ -1,428 +0,0 @@ -#ifndef _USER_SETTINGS_H -#define _USER_SETTINGS_H - -///////////////////////////// DRIVER CAPABILITIES //////////////////////////// - -// The Cg compiler uses different "profiles" with different capabilities. -// This shader requires a Cg compilation profile >= arbfp1, but a few options -// require higher profiles like fp30 or fp40. The shader can't detect profile -// or driver capabilities, so instead you must comment or uncomment the lines -// below with "//" before "#define." Disable an option if you get compilation -// errors resembling those listed. Generally speaking, all of these options -// will run on nVidia cards, but only _DRIVERS_ALLOW_TEX2DBIAS (if that) is -// likely to run on ATI/AMD, due to the Cg compiler's profile limitations. - -// Derivatives: Unsupported on fp20, ps_1_1, ps_1_2, ps_1_3, and arbfp1. -// Among other things, derivatives help us fix anisotropic filtering artifacts -// with curved manually tiled phosphor mask coords. Related errors: -// error C3004: function "float2 ddx(float2);" not supported in this profile -// error C3004: function "float2 ddy(float2);" not supported in this profile -#ifndef _DRIVERS_ALLOW_DERIVATIVES - #define _DRIVERS_ALLOW_DERIVATIVES 0 -#endif - -// Fine derivatives: Unsupported on older ATI cards. -// Fine derivatives enable 2x2 fragment block communication, letting us perform -// fast single-pass blur operations. If your card uses coarse derivatives and -// these are enabled, blurs could look broken. Derivatives are a prerequisite. -#if _DRIVERS_ALLOW_DERIVATIVES - #define _DRIVERS_ALLOW_FINE_DERIVATIVES -#endif - -// Dynamic looping: Requires an fp30 or newer profile. -// This makes phosphor mask resampling faster in some cases. Related errors: -// error C5013: profile does not support "for" statements and "for" could not -// be unrolled -#ifndef _DRIVERS_ALLOW_DYNAMIC_BRANCHES - #define _DRIVERS_ALLOW_DYNAMIC_BRANCHES 0 -#endif - -// Without _DRIVERS_ALLOW_DYNAMIC_BRANCHES, we need to use unrollable loops. -// Using one static loop avoids overhead if the user is right, but if the user -// is wrong (loops are allowed), breaking a loop into if-blocked pieces with a -// binary search can potentially save some iterations. However, it may fail: -// error C6001: Temporary register limit of 32 exceeded; 35 registers -// needed to compile program -#ifndef _ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS - #define _ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS 0 -#endif - -// tex2Dlod: Requires an fp40 or newer profile. This can be used to disable -// anisotropic filtering, thereby fixing related artifacts. Related errors: -// error C3004: function "float4 tex2Dlod(sampler2D, float4);" not supported in -// this profile -// #ifndef _DRIVERS_ALLOW_TEX2DLOD -// #define _DRIVERS_ALLOW_TEX2DLOD 1 -// #endif - -// tex2Dbias: Requires an fp30 or newer profile. This can be used to alleviate -// artifacts from anisotropic filtering and mipmapping. Related errors: -// error C3004: function "float4 tex2Dbias(sampler2D, float4);" not supported -// in this profile -// #ifndef _DRIVERS_ALLOW_TEX2DBIAS -// #define _DRIVERS_ALLOW_TEX2DBIAS 0 -// #endif - -// Integrated graphics compatibility: Integrated graphics like Intel HD 4000 -// impose stricter limitations on register counts and instructions. Enable -// _INTEGRATED_GRAPHICS_COMPATIBILITY_MODE if you still see error C6001 or: -// error C6002: Instruction limit of 1024 exceeded: 1523 instructions needed -// to compile program. -// Enabling integrated graphics compatibility mode will automatically disable: -// 1.) _PHOSPHOR_MASK_MANUALLY_RESIZE: The phosphor mask will be softer. -// (This may be reenabled in a later release.) -// 2.) _RUNTIME_GEOMETRY_MODE -// 3.) The high-quality 4x4 Gaussian resize for the bloom approximation -#ifndef _INTEGRATED_GRAPHICS_COMPATIBILITY_MODE - #define _INTEGRATED_GRAPHICS_COMPATIBILITY_MODE 0 -#endif - - -//////////////////////////// USER CODEPATH OPTIONS /////////////////////////// - -// To disable a #define option, turn its line into a comment with "//." - -// RUNTIME VS. COMPILE-TIME OPTIONS (Major Performance Implications): -// Enable runtime shader parameters in the Retroarch (etc.) GUI? They override -// many of the options in this file and allow real-time tuning, but many of -// them are slower. Disabling them and using this text file will boost FPS. -#ifndef _RUNTIME_SHADER_PARAMS_ENABLE - #define _RUNTIME_SHADER_PARAMS_ENABLE 1 -#endif -// Specify the phosphor bloom sigma at runtime? This option is 10% slower, but -// it's the only way to do a wide-enough full bloom with a runtime dot pitch. -#ifndef _RUNTIME_PHOSPHOR_BLOOM_SIGMA - #define _RUNTIME_PHOSPHOR_BLOOM_SIGMA 1 -#endif -// Specify antialiasing weight parameters at runtime? (Costs ~20% with cubics) -#ifndef _RUNTIME_ANTIALIAS_WEIGHTS - #define _RUNTIME_ANTIALIAS_WEIGHTS 1 -#endif -// Specify subpixel offsets at runtime? (WARNING: EXTREMELY EXPENSIVE!) -#ifndef _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS - #define _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS 0 -#endif -// Make beam_horiz_filter and beam_horiz_linear_rgb_weight into runtime shader -// parameters? This will require more math or dynamic branching. -#ifndef _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE - #define _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE 1 -#endif -// Specify the tilt at runtime? This makes things about 3% slower. -// akgunter: -// This is used in crt-royale-geometry-aa-last-pass.fxh. -// I've hard-coded it to 1 and hidden it from the UI in the ReShade version because -// I don't know a good way to port that logic. If anyone ever does figure that -// out, we can uncomment and port that logic and then unhide this definition. -#define _RUNTIME_GEOMETRY_TILT 1 - -// Specify the geometry mode at runtime? -#ifndef _RUNTIME_GEOMETRY_MODE - #define _RUNTIME_GEOMETRY_MODE 1 -#endif -// Specify the phosphor mask type (aperture grille, slot mask, shadow mask) and -// mode (Lanczos-resize, hardware resize, or tile 1:1) at runtime, even without -// dynamic branches? This is cheap if mask_resize_viewport_scale is small. -// #ifndef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT -// #define FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT 1 -// #endif - -// PHOSPHOR MASK: -// Choose between a 64x64 or 512x512 source for the phosphor mask -// Mainly affects Sample Mode 1 -// #ifndef USE_LARGE_PHOSPHOR_MASK -// #define USE_LARGE_PHOSPHOR_MASK 1 -// #endif - -// Manually resize the phosphor mask for best results (slower)? Disabling this -// removes the option to do so, but it may be faster without dynamic branches. -#ifndef _PHOSPHOR_MASK_MANUALLY_RESIZE - #define _PHOSPHOR_MASK_MANUALLY_RESIZE 1 -#endif -// If we sinc-resize the mask, should we Lanczos-window it (slower but better)? -// #ifndef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW -// #define PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW 1 -// #endif -// Larger blurs are expensive, but we need them to blur larger triads. We can -// detect the right blur if the triad size is static or our profile allows -// dynamic branches, but otherwise we use the largest blur the user indicates -// they might need: - -#define _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS 1 -#define _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS 2 -#define _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS 3 -#define _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS 4 - -#if !_RUNTIME_PHOSPHOR_BLOOM_SIGMA - #ifndef PHOSPHOR_BLOOM_TRIAD_SIZE_MODE - #define PHOSPHOR_BLOOM_TRIAD_SIZE_MODE _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS // [0 - 4] - #endif -#endif - -// Here's a helpful chart: -// MaxTriadSize BlurSize MinTriadCountsByResolution -// 3.0 9.0 480/640/960/1920 triads at 1080p/1440p/2160p/4320p, 4:3 aspect -// 6.0 17.0 240/320/480/960 triads at 1080p/1440p/2160p/4320p, 4:3 aspect -// 9.0 25.0 160/213/320/640 triads at 1080p/1440p/2160p/4320p, 4:3 aspect -// 12.0 31.0 120/160/240/480 triads at 1080p/1440p/2160p/4320p, 4:3 aspect -// 18.0 43.0 80/107/160/320 triads at 1080p/1440p/2160p/4320p, 4:3 aspect - -/////////////////////////////// USER PARAMETERS ////////////////////////////// - -// Note: Many of these static parameters are overridden by runtime shader -// parameters when those are enabled. However, many others are static codepath -// options that were cleaner or more convert to code as static constants. - -// GAMMA: -static const float crt_gamma_static = 2.5; // range [1, 5] -static const float lcd_gamma_static = 2.2; // range [1, 5] - -// LEVELS MANAGEMENT: -// Control the final multiplicative image contrast: -static const float levels_contrast_static = 1.0; // range [0, 4) -// We auto-dim to avoid clipping between passes and restore brightness -// later. Control the dim factor here: Lower values clip less but crush -// blacks more (static only for now). -static const float levels_autodim_temp = 0.5; // range (0, 1] default is 0.5 but that was unnecessarily dark for me, so I set it to 1.0 - -// HALATION/DIFFUSION/BLOOM: -// Halation weight: How much energy should be lost to electrons bounding -// around under the CRT glass and exciting random phosphors? -static const float halation_weight_static = 0.0; // range [0, 1] -// Refractive diffusion weight: How much light should spread/diffuse from -// refracting through the CRT glass? -static const float diffusion_weight_static = 0.075; // range [0, 1] -// Underestimate brightness: Bright areas bloom more, but we can base the -// bloom brightpass on a lower brightness to sharpen phosphors, or a higher -// brightness to soften them. Low values clip, but >= 0.8 looks okay. -static const float bloom_underestimate_levels_static = 0.8; // range [0, 5] -// Blur all colors more than necessary for a softer phosphor bloom? -static const float bloom_excess_static = 0.0; // range [0, 1] -// The BLOOM_APPROX pass approximates a phosphor blur early on with a small -// blurred resize of the input (convergence offsets are applied as well). -// There are three filter options (static option only for now): -// 0.) Bilinear resize: A fast, close approximation to a 4x4 resize -// if min_allowed_viewport_triads and the BLOOM_APPROX resolution are sane -// and gaussian_beam_max_sigma is low. -// 1.) 3x3 resize blur: Medium speed, soft/smeared from bilinear blurring, -// always uses a static sigma regardless of gaussian_beam_max_sigma or -// mask_num_triads_across. -// 2.) True 4x4 Gaussian resize: Slowest, technically correct. -// These options are more pronounced for the fast, unbloomed shader version. -#ifndef RADEON_FIX - #define RADEON_FIX 0 -#endif - -#if !RADEON_FIX - static const float bloom_approx_filter_static = 2.0; -#else - static const float bloom_approx_filter_static = 1.0; -#endif - -// ELECTRON BEAM SCANLINE DISTRIBUTION: -// How many scanlines should contribute light to each pixel? Using more -// scanlines is slower (especially for a generalized Gaussian) but less -// distorted with larger beam sigmas (especially for a pure Gaussian). The -// max_beam_sigma at which the closest unused weight is guaranteed < -// 1.0/255.0 (for a 3x antialiased pure Gaussian) is: -// 2 scanlines: max_beam_sigma = 0.2089; distortions begin ~0.34; 141.7 FPS pure, 131.9 FPS generalized -// 3 scanlines, max_beam_sigma = 0.3879; distortions begin ~0.52; 137.5 FPS pure; 123.8 FPS generalized -// 4 scanlines, max_beam_sigma = 0.5723; distortions begin ~0.70; 134.7 FPS pure; 117.2 FPS generalized -// 5 scanlines, max_beam_sigma = 0.7591; distortions begin ~0.89; 131.6 FPS pure; 112.1 FPS generalized -// 6 scanlines, max_beam_sigma = 0.9483; distortions begin ~1.08; 127.9 FPS pure; 105.6 FPS generalized -static const float beam_num_scanlines = 3.0; // range [2, 6] -// A generalized Gaussian beam varies shape with color too, now just width. -// It's slower but more flexible (static option only for now). -static const bool beam_generalized_gaussian = true; -// What kind of scanline antialiasing do you want? -// 0: Sample weights at 1x; 1: Sample weights at 3x; 2: Compute an integral -// Integrals are slow (especially for generalized Gaussians) and rarely any -// better than 3x antialiasing (static option only for now). -static const float beam_antialias_level = 1.0; // range [0, 2] -// Min/max standard deviations for scanline beams: Higher values widen and -// soften scanlines. Depending on other options, low min sigmas can alias. -static const float gaussian_beam_min_sigma_static = 0.02; // range (0, 1] -static const float gaussian_beam_max_sigma_static = 0.3; // range (0, 1] -// Beam width varies as a function of color: A power function (0) is more -// configurable, but a spherical function (1) gives the widest beam -// variability without aliasing (static option only for now). -static const float beam_spot_shape_function = 0.0; -// Spot shape power: Powers <= 1 give smoother spot shapes but lower -// sharpness. Powers >= 1.0 are awful unless mix/max sigmas are close. -static const float gaussian_beam_spot_power_static = 1.0/3.0; // range (0, 16] -// Generalized Gaussian max shape parameters: Higher values give flatter -// scanline plateaus and steeper dropoffs, simultaneously widening and -// sharpening scanlines at the cost of aliasing. 2.0 is pure Gaussian, and -// values > ~40.0 cause artifacts with integrals. -static const float gaussian_beam_min_shape_static = 2.0; // range [2, 32] -static const float gaussian_beam_max_shape_static = 4.0; // range [2, 32] -// Generalized Gaussian shape power: Affects how quickly the distribution -// changes shape from Gaussian to steep/plateaued as color increases from 0 -// to 1.0. Higher powers appear softer for most colors, and lower powers -// appear sharper for most colors. -static const float gaussian_beam_shape_power_static = 1.0/4.0; // range (0, 16] -// What filter should be used to sample scanlines horizontally? -// 0: Quilez (fast), 1: Gaussian (configurable), 2: Lanczos2 (sharp) -static const float beam_horiz_filter_static = 0.0; -// Standard deviation for horizontal Gaussian resampling: -static const float beam_horiz_sigma_static = 0.35; // range (0, 2/3] -// Do horizontal scanline sampling in linear RGB (correct light mixing), -// gamma-encoded RGB (darker, hard spot shape, may better match bandwidth- -// limiting circuitry in some CRT's), or a weighted avg.? -static const float beam_horiz_linear_rgb_weight_static = 1.0; // range [0, 1] -// Simulate scanline misconvergence? This needs 3x horizontal texture -// samples and 3x texture samples of BLOOM_APPROX and HALATION_BLUR in -// later passes (static option only for now). -static const bool beam_misconvergence = true; -// Convergence offsets in x/y directions for R/G/B scanline beams in units -// of scanlines. Positive offsets go right/down; ranges [-2, 2] -static const float2 convergence_offsets_r_static = float2(0.1, 0.2); -static const float2 convergence_offsets_g_static = float2(0.3, 0.4); -static const float2 convergence_offsets_b_static = float2(0.5, 0.6); -// Detect interlacing (static option only for now)? -static const bool interlace_detect = true; -// Assume 1080-line sources are interlaced? -static const bool interlace_1080i_static = false; -// For interlaced sources, assume TFF (top-field first) or BFF order? -// (Whether this matters depends on the nature of the interlaced input.) -static const bool interlace_back_field_first_static = false; - -// ANTIALIASING: -// What AA level do you want for curvature/overscan/subpixels? Options: -// 0x (none), 1x (sample subpixels), 4x, 5x, 6x, 7x, 8x, 12x, 16x, 20x, 24x -// (Static option only for now) -#ifndef antialias_level - #define antialias_level 0.0 -#endif -// static const float aa_level = 12.0; // range [0, 24] -// static const float aa_level = 0.0; // range [0, 24] -// What antialiasing filter do you want (static option only)? Options: -// 0: Box (separable), 1: Box (cylindrical), -// 2: Tent (separable), 3: Tent (cylindrical), -// 4: Gaussian (separable), 5: Gaussian (cylindrical), -// 6: Cubic* (separable), 7: Cubic* (cylindrical, poor) -// 8: Lanczos Sinc (separable), 9: Lanczos Jinc (cylindrical, poor) -// * = Especially slow with _RUNTIME_ANTIALIAS_WEIGHTS -#ifndef antialias_filter - #define antialias_filter 6 -#endif -static const float aa_filter = antialias_filter; // range [0, 9] -// Flip the sample grid on odd/even frames (static option only for now)? -#ifndef antialias_temporal - #define antialias_temporal false -#endif -static const bool aa_temporal = antialias_temporal; -// Use RGB subpixel offsets for antialiasing? The pixel is at green, and -// the blue offset is the negative r offset; range [0, 0.5] -static const float2 aa_subpixel_r_offset_static = float2(-1.0/3.0, 0.0);//float2(0.0); -// Cubics: See http://www.imagemagick.org/Usage/filter/#mitchell -// 1.) "Keys cubics" with B = 1 - 2C are considered the highest quality. -// 2.) C = 0.5 (default) is Catmull-Rom; higher C's apply sharpening. -// 3.) C = 1.0/3.0 is the Mitchell-Netravali filter. -// 4.) C = 0.0 is a soft spline filter. -static const float aa_cubic_c_static = 0.5; // range [0, 4] -// Standard deviation for Gaussian antialiasing: Try 0.5/aa_pixel_diameter. -static const float aa_gauss_sigma_static = 0.5; // range [0.0625, 1.0] - -// PHOSPHOR MASK: -// Mask type: 0 = aperture grille, 1 = slot mask, 2 = shadow mask -// 3 = lowres grille, 4 = lowres slot, 5 = lowres shadow -static const float mask_type_static = 4.0; // range [0, 5] -// We can sample the mask three ways. Pick 2/3 from: Pretty/Fast/Flexible. -// 0.) Sinc-resize to the desired dot pitch manually (pretty/slow/flexible). -// This requires _PHOSPHOR_MASK_MANUALLY_RESIZE to be #defined. -// 1.) Hardware-resize to the desired dot pitch (ugly/fast/flexible). This -// is halfway decent with LUT mipmapping but atrocious without it. -// 2.) Tile it without resizing at a 1:1 texel:pixel ratio for flat coords -// (pretty/fast/inflexible). Each input LUT has a fixed dot pitch. -// This mode reuses the same masks, so triads will be enormous unless -// you change the mask LUT filenames in your .cgp file. -static const float mask_sample_mode_static = 0.0; // range [0, 2] -// Prefer setting the triad size (0.0) or number on the screen (1.0)? -// If _RUNTIME_PHOSPHOR_BLOOM_SIGMA isn't #defined, the specified triad size -// will always be used to calculate the full bloom sigma statically. -static const float mask_size_param_static = 0.0; // range [0, 1] -// Specify the phosphor triad size, in pixels. Each tile (usually with 8 -// triads) will be rounded to the nearest integer tile size and clamped to -// obey minimum size constraints (imposed to reduce downsize taps) and -// maximum size constraints (imposed to have a sane MASK_RESIZE FBO size). -// To increase the size limit, double the viewport-relative scales for the -// two MASK_RESIZE passes in crt-royale.cgp and user-cgp-contants.h. -// range [1, mask_texture_small_size/mask_triads_per_tile] -static const float mask_triad_width_static = 24.0 / 8.0; -// If mask_size_param is 1.0/true, we'll go by this instead (the -// final size will be rounded and constrained as above); default 480.0 -static const float mask_num_triads_across_static = 480.0; -// How many lobes should the sinc/Lanczos resizer use? More lobes require -// more samples and avoid moire a bit better, but some is unavoidable -// depending on the destination size (static option for now). -static const float mask_sinc_lobes = 3.0; // range [2, 4] -// The mask is resized using a variable number of taps in each dimension, -// but some Cg profiles always fetch a constant number of taps no matter -// what (no dynamic branching). We can limit the maximum number of taps if -// we statically limit the minimum phosphor triad size. Larger values are -// faster, but the limit IS enforced (static option only, forever); -// range [1, mask_texture_small_size/mask_triads_per_tile] -// TODO: Make this 1.0 and compensate with smarter sampling! -static const float mask_min_allowed_triad_size = 2.0; - -// GEOMETRY: -// Geometry mode: -// 0: Off (default), 1: Spherical mapping (like cgwg's), -// 2: Alt. spherical mapping (more bulbous), 3: Cylindrical/Trinitron -static const float geom_mode_static = 0.0; // range [0, 3] -// Radius of curvature: Measured in units of your viewport's diagonal size. -static const float geom_radius_static = 2.0; // range [1/(2*pi), 1024] -// View dist is the distance from the player to their physical screen, in -// units of the viewport's diagonal size. It controls the field of view. -static const float geom_view_dist_static = 2.0; // range [0.5, 1024] -// Tilt angle in radians (clockwise around up and right vectors): -static const float2 geom_tilt_angle_static = float2(0.0, 0.0); // range [-pi, pi] -// Aspect ratio: When the true viewport size is unknown, this value is used -// to help convert between the phosphor triad size and count, along with -// the mask_resize_viewport_scale constant from user-cgp-constants.h. Set -// this equal to Retroarch's display aspect ratio (DAR) for best results; -// range [1, geom_max_aspect_ratio from user-cgp-constants.h]; -// default (256/224)*(54/47) = 1.313069909 (see below) -static const float geom_aspect_ratio_static = 1.313069909; -// Before getting into overscan, here's some general aspect ratio info: -// - DAR = display aspect ratio = SAR * PAR; as in your Retroarch setting -// - SAR = storage aspect ratio = DAR / PAR; square pixel emulator frame AR -// - PAR = pixel aspect ratio = DAR / SAR; holds regardless of cropping -// Geometry processing has to "undo" the screen-space 2D DAR to calculate -// 3D view vectors, then reapplies the aspect ratio to the simulated CRT in -// uv-space. To ensure the source SAR is intended for a ~4:3 DAR, either: -// a.) Enable Retroarch's "Crop Overscan" -// b.) Readd horizontal padding: Set overscan to e.g. N*(1.0, 240.0/224.0) -// Real consoles use horizontal black padding in the signal, but emulators -// often crop this without cropping the vertical padding; a 256x224 [S]NES -// frame (8:7 SAR) is intended for a ~4:3 DAR, but a 256x240 frame is not. -// The correct [S]NES PAR is 54:47, found by blargg and NewRisingSun: -// http://board.zsnes.com/phpBB3/viewtopic.php?f=22&t=11928&start=50 -// http://forums.nesdev.com/viewtopic.php?p=24815#p24815 -// For flat output, it's okay to set DAR = [existing] SAR * [correct] PAR -// without doing a. or b., but horizontal image borders will be tighter -// than vertical ones, messing up curvature and overscan. Fixing the -// padding first corrects this. -// Overscan: Amount to "zoom in" before cropping. You can zoom uniformly -// or adjust x/y independently to e.g. readd horizontal padding, as noted -// above: Values < 1.0 zoom out; range (0, inf) -static const float2 geom_overscan_static = float2(1.0, 1.0);// * 1.005 * (1.0, 240/224.0) -// Compute a proper pixel-space to texture-space matrix even without ddx()/ -// ddy()? This is ~8.5% slower but improves antialiasing/subpixel filtering -// with strong curvature (static option only for now). -static const bool geom_force_correct_tangent_matrix = true; - -// BORDERS: -// Rounded border size in texture uv coords: -static const float border_size_static = 0.015; // range [0, 0.5] -// Border darkness: Moderate values darken the border smoothly, and high -// values make the image very dark just inside the border: -static const float border_darkness_static = 2.0; // range [0, inf) -// Border compression: High numbers compress border transitions, narrowing -// the dark border area. -static const float border_compress_static = 2.5; // range [1, inf) - -// TODO: Nuke this -#define mask_size_xy float2(512, 512) - -#endif // _USER_SETTINGS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/bloom.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/shaders/bloom.fxh deleted file mode 100644 index 8cc4832f5..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/bloom.fxh +++ /dev/null @@ -1,149 +0,0 @@ -#ifndef _BLOOM_H -#define _BLOOM_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade. -// Copyright (C) 2020 Alex Gunter -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - -#include "../lib/user-settings.fxh" -#include "../lib/derived-settings-and-constants.fxh" -#include "../lib/bind-shader-params.fxh" -#include "../lib/gamma-management.fxh" -#include "../lib/downsampling-functions.fxh" -#include "../lib/blur-functions.fxh" -#include "../lib/bloom-functions.fxh" - -#include "shared-objects.fxh" - - -void approximateBloomVertPS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - - out float4 color : SV_Target -) { - const float2 delta_uv = blur_radius * float2(0.0, rcp(TEX_BEAMCONVERGENCE_HEIGHT)); - - color = float4(opaque_linear_downsample( - samplerBeamConvergence, texcoord, - uint((bloomapprox_downsizing_factor - 1)/2), - delta_uv - ), 1); -} - -void approximateBloomHorizPS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - - out float4 color : SV_Target -) { - const float2 delta_uv = blur_radius * float2(rcp(TEX_BEAMCONVERGENCE_WIDTH), 0.0); - - color = float4(opaque_linear_downsample( - samplerBloomApproxVert, texcoord, - uint((bloomapprox_downsizing_factor - 1)/2), - delta_uv - ), 1); -} - - -void bloomHorizontalVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0, - out float bloom_sigma_runtime : TEXCOORD1 -) { - PostProcessVS(id, position, texcoord); - - bloom_sigma_runtime = get_min_sigma_to_blur_triad(calc_triad_size().x, bloom_diff_thresh_); -} - -void bloomHorizontalPS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - in float bloom_sigma_runtime : TEXCOORD1, - - out float4 color : SV_Target -) { - const float2 bloom_dxdy = float2(rcp(TEX_BLOOMVERTICAL_WIDTH), 0); - - // Blur the vertically blurred brightpass horizontally by 9/17/25/43x: - const float bloom_sigma = get_final_bloom_sigma(bloom_sigma_runtime); - const float3 blurred_brightpass = tex2DblurNfast(samplerBloomVertical, - texcoord, bloom_dxdy, bloom_sigma, get_intermediate_gamma()); - - // Sample the masked scanlines. Alpha contains the auto-dim factor: - const float3 intensity_dim = tex2D_linearize(samplerMaskedScanlines, texcoord, get_intermediate_gamma()).rgb; - const float auto_dim_factor = levels_autodim_temp; - const float undim_factor = 1.0/auto_dim_factor; - - // Calculate the mask dimpass, add it to the blurred brightpass, and - // undim (from scanline auto-dim) and amplify (from mask dim) the result: - const float mask_amplify = get_mask_amplify(); - const float3 brightpass = tex2D_linearize(samplerBrightpass, texcoord, get_intermediate_gamma()).rgb; - const float3 dimpass = intensity_dim - brightpass; - const float3 phosphor_bloom = (dimpass + blurred_brightpass) * - mask_amplify * undim_factor * levels_contrast; - - // Sample the halation texture, and let some light bleed into refractive - // diffusion. Conceptually this occurs before the phosphor bloom, but - // adding it in earlier passes causes black crush in the diffusion colors. - const float3 raw_diffusion_color = tex2D_linearize(samplerBlurHorizontal, texcoord, get_intermediate_gamma()).rgb; - const float3 raw_halation_color = dot(raw_diffusion_color, float3(1, 1, 1)) / 3.0; - const float3 diffusion_color = levels_contrast * lerp(raw_diffusion_color, raw_halation_color, halation_weight); - const float3 final_bloom = lerp(phosphor_bloom, diffusion_color, diffusion_weight); - - // Encode and output the bloomed image: - color = encode_output(float4(final_bloom, 1.0), get_intermediate_gamma()); -} - - -void bloomVerticalVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0, - out float bloom_sigma_runtime : TEXCOORD1 -) { - PostProcessVS(id, position, texcoord); - - bloom_sigma_runtime = get_min_sigma_to_blur_triad(calc_triad_size().x, bloom_diff_thresh_); -} - -void bloomVerticalPS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - in float bloom_sigma_runtime : TEXCOORD1, - - out float4 color : SV_Target -) { - const float2 bloom_dxdy = float2(0, rcp(TEX_BLOOMVERTICAL_HEIGHT)); - - // Blur the brightpass horizontally with a 9/17/25/43x blur: - const float bloom_sigma = get_final_bloom_sigma(bloom_sigma_runtime); - const float3 color3 = tex2DblurNfast(samplerBrightpass, texcoord, - bloom_dxdy, bloom_sigma, get_intermediate_gamma()); - - // Encode and output the blurred image: - color = encode_output(float4(color3, 1.0), get_intermediate_gamma()); -} - -#endif // _BLOOM_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/blurring.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/shaders/blurring.fxh deleted file mode 100644 index 0695eace8..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/blurring.fxh +++ /dev/null @@ -1,131 +0,0 @@ -#ifndef _BLURRING_H -#define _BLURRING_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade. -// Copyright (C) 2020 Alex Gunter -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -#include "../lib/gamma-management.fxh" -#include "../lib/blur-functions.fxh" - -#include "shared-objects.fxh" - - -void blurHorizontalVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0, - out float2 blur_dxdy : TEXCOORD1 -) { - PostProcessVS(id, position, texcoord); - - // Get the uv sample distance between output pixels. Blurs are not generic - // Gaussian resizers, and correct blurs require: - // 1.) OutputSize == InputSize * 2^m, where m is an integer <= 0. - // 2.) mipmap_inputN = "true" for this pass in the preset if m != 0 - // 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs - // Gaussian resizers would upsize using the distance between input texels - // (not output pixels), but we avoid this and consistently blur at the - // destination size. Otherwise, combining statically calculated weights - // with bilinear sample exploitation would result in terrible artifacts. - static const float2 output_size = TEX_BLURHORIZONTAL_SIZE; - static const float2 dxdy = 1.0 / output_size; - // This blur is vertical-only, so zero out the horizontal offset: - blur_dxdy = float2(dxdy.x, 0.0); -} - -void blurHorizontalPS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - in float2 blur_dxdy : TEXCOORD1, - - out float4 color : SV_Target -) { - static const float3 blur_color = tex2Dblur9fast(samplerBlurVertical, texcoord, blur_dxdy, get_intermediate_gamma()); - // Encode and output the blurred image: - // color = encode_output(float4(blur_color, 1.0), 1.0); - color = encode_output(float4(blur_color, 1.0), get_intermediate_gamma()); -} - - -void blurVerticalVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0, - out float2 blur_dxdy : TEXCOORD1 -) { - PostProcessVS(id, position, texcoord); - - // Get the uv sample distance between output pixels. Blurs are not generic - // Gaussian resizers, and correct blurs require: - // 1.) OutputSize == InputSize * 2^m, where m is an integer <= 0. - // 2.) mipmap_inputN = "true" for this pass in the preset if m != 0 - // 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs - // Gaussian resizers would upsize using the distance between input texels - // (not output pixels), but we avoid this and consistently blur at the - // destination size. Otherwise, combining statically calculated weights - // with bilinear sample exploitation would result in terrible artifacts. - static const float2 output_size = TEX_BLURVERTICAL_SIZE; - static const float2 dxdy = 1.0 / output_size; - // This blur is vertical-only, so zero out the horizontal offset: - blur_dxdy = float2(0.0, dxdy.y); -} - -void blurVerticalPS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - in float2 blur_dxdy : TEXCOORD1, - - out float4 color : SV_Target -) { - static const float3 blur_color = tex2Dblur9fast(samplerBloomApproxHoriz, texcoord, blur_dxdy, get_intermediate_gamma()); - // Encode and output the blurred image: - // color = encode_output(float4(blur_color, 1.0), 1.0); - color = encode_output(float4(blur_color, 1.0), get_intermediate_gamma()); -} - -#endif // _BLURRING_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/brightpass.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/shaders/brightpass.fxh deleted file mode 100644 index fd5ad8fb8..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/brightpass.fxh +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef _BRIGHTPASS_H -#define _BRIGHTPASS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade. -// Copyright (C) 2020 Alex Gunter -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -#include "../lib/user-settings.fxh" -#include "../lib/derived-settings-and-constants.fxh" -#include "../lib/bind-shader-params.fxh" -#include "../lib/gamma-management.fxh" -#include "../lib/phosphor-mask-calculations.fxh" -#include "../lib/scanline-functions.fxh" -#include "../lib/bloom-functions.fxh" -#include "../lib/blur-functions.fxh" - - -void brightpassVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0, - out float bloom_sigma_runtime : TEXCOORD1 -) { - PostProcessVS(id, position, texcoord); - - bloom_sigma_runtime = get_min_sigma_to_blur_triad(calc_triad_size().x, bloom_diff_thresh_); -} - -void brightpassPS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - in float bloom_sigma_runtime : TEXCOORD1, - - out float4 color : SV_Target -) { - // Sample the masked scanlines: - const float3 intensity_dim = tex2D_linearize(samplerMaskedScanlines, texcoord, get_intermediate_gamma()).rgb; - // Get the full intensity, including auto-undimming, and mask compensation: - const float mask_amplify = get_mask_amplify(); - const float3 intensity = intensity_dim * rcp(levels_autodim_temp) * mask_amplify * levels_contrast; - - // Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines - // would look like, so we can estimate how much energy we'll receive from - // blooming neighbors: - const float3 phosphor_blur_approx = levels_contrast * tex2D_linearize(samplerBloomApproxHoriz, texcoord, get_intermediate_gamma()).rgb; - - // Compute the blur weight for the center texel and the maximum energy we - // expect to receive from neighbors: - const float bloom_sigma = get_final_bloom_sigma(bloom_sigma_runtime); - const float center_weight = get_center_weight(bloom_sigma); - const float3 max_area_contribution_approx = - max(float3(0.0, 0.0, 0.0), phosphor_blur_approx - center_weight * intensity); - // Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0), - // because it actually gets better results (on top of being very simple), - // but adjust all intensities for the user's desired underestimate factor: - const float3 area_contrib_underestimate = bloom_underestimate_levels * max_area_contribution_approx; - const float3 intensity_underestimate = bloom_underestimate_levels * intensity; - // Calculate the blur_ratio, the ratio of intensity we want to blur: - const float3 blur_ratio_temp = - ((float3(1.0, 1.0, 1.0) - area_contrib_underestimate) / - intensity_underestimate - float3(1.0, 1.0, 1.0)) / (center_weight - 1.0); - const float3 blur_ratio = saturate(blur_ratio_temp); - // Calculate the brightpass based on the auto-dimmed, unamplified, masked - // scanlines, encode if necessary, and return! - const float3 brightpass = intensity_dim * - lerp(blur_ratio, float3(1.0, 1.0, 1.0), bloom_excess); - - color = encode_output(float4(brightpass, 1.0), get_intermediate_gamma()); -} - -#endif // _BRIGHTPASS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/content-box.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/shaders/content-box.fxh deleted file mode 100644 index ccde83258..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/content-box.fxh +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef _CONTENT_BOX_H -#define _CONTENT_BOX_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2020 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -#include "shared-objects.fxh" - - -void contentCropVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0 -) { - #if _DX9_ACTIVE - texcoord.x = (id == 1 || id == 3) ? content_right : content_left; - texcoord.y = (id > 1) ? content_lower : content_upper; - - position.x = (id == 1 || id == 3) ? 1 : -1; - position.y = (id > 1) ? -1 : 1; - position.zw = 1; - #else - texcoord.x = (id & 1) ? content_right : content_left; - texcoord.y = (id & 2) ? content_lower : content_upper; - - position.x = (id & 1) ? 1 : -1; - position.y = (id & 2) ? -1 : 1; - position.zw = 1; - #endif -} - -#if USE_VERTEX_UNCROPPING -/* - * Using the vertex shader for uncropping can save about 0.1ms in some apps. - * However, some apps like SNES9X w/ DX9 don't trigger a refresh of the entire screen, - * which in turn causes the ReShade UI to "stick around" after it's closed. - * - * The slower algorithm forces the entire screen to refresh, which forces the - * area outside the content box to be black. I assume most users will prefer - * the results of the slower algorithm and won't notice the 0.1ms. Users who - * need that 0.1ms can use a preprocessor def to recover that time. - */ - void contentUncropVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0 - ) { - #if _DX9_ACTIVE - texcoord.x = id == 1 || id == 3; - texcoord.y = id < 2; - - position.x = (id == 1 || id == 3) ? content_scale.x : -content_scale.x; - position.y = (id > 1) ? content_scale.y : -content_scale.y; - position.zw = 1; - #else - texcoord.x = id & 1; - texcoord.y = !(id & 2); - - position.x = (id & 1) ? content_scale.x : -content_scale.x; - position.y = (id & 2) ? content_scale.y : -content_scale.y; - position.zw = 1; - #endif - } - - void uncropContentPixelShader( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - - out float4 color : SV_Target - ) { - color = tex2D(samplerGeometry, texcoord); - } -#else - void contentUncropVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0 - ) { - // TODO: There's probably a better way to code this. - // I'll figure it out later. - #if _DX9_ACTIVE - texcoord.x = id == 1 || id == 3; - texcoord.y = id < 2; - - position.x = (id == 1 || id == 3) ? 1 : -1; - position.y = (id > 1) ? 1 : -1; - position.zw = 1; - #else - texcoord.x = id & 1; - texcoord.y = !(id & 2); - - position.x = (id & 1) ? 1 : -1; - position.y = (id & 2) ? 1 : -1; - position.zw = 1; - #endif - } - - void uncropContentPixelShader( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - - out float4 color : SV_Target - ) { - const bool is_in_boundary = float( - texcoord.x >= content_left && texcoord.x <= content_right && - texcoord.y >= content_upper && texcoord.y <= content_lower - ); - const float2 texcoord_uncropped = ((texcoord - content_offset) * buffer_size + 0) / content_size; - - const float4 raw_color = tex2D(samplerGeometry, texcoord_uncropped); - color = float4(is_in_boundary * raw_color.rgb, raw_color.a); - } -#endif - - -#if CONTENT_BOX_VISIBLE - #ifndef CONTENT_BOX_INSCRIBED - #define CONTENT_BOX_INSCRIBED 1 - #endif - - #ifndef CONTENT_BOX_THICKNESS - #define CONTENT_BOX_THICKNESS 5 - #endif - - #ifndef CONTENT_BOX_COLOR_R - #define CONTENT_BOX_COLOR_R 1.0 - #endif - - #ifndef CONTENT_BOX_COLOR_G - #define CONTENT_BOX_COLOR_G 0.0 - #endif - - #ifndef CONTENT_BOX_COLOR_B - #define CONTENT_BOX_COLOR_B 0.0 - #endif - - static const float vert_line_thickness = float(CONTENT_BOX_THICKNESS) / BUFFER_WIDTH; - static const float horiz_line_thickness = float(CONTENT_BOX_THICKNESS) / BUFFER_HEIGHT; - - #if CONTENT_BOX_INSCRIBED - // Set the outer borders to the edge of the content - static const float left_line_1 = content_left; - static const float left_line_2 = left_line_1 + vert_line_thickness; - static const float right_line_2 = content_right; - static const float right_line_1 = right_line_2 - vert_line_thickness; - - static const float upper_line_1 = content_upper; - static const float upper_line_2 = upper_line_1 + horiz_line_thickness; - static const float lower_line_2 = content_lower; - static const float lower_line_1 = lower_line_2 - horiz_line_thickness; - #else - // Set the inner borders to the edge of the content - static const float left_line_2 = content_left; - static const float left_line_1 = left_line_2 - vert_line_thickness; - static const float right_line_1 = content_right; - static const float right_line_2 = right_line_1 + vert_line_thickness; - - static const float upper_line_2 = content_upper; - static const float upper_line_1 = upper_line_2 - horiz_line_thickness; - static const float lower_line_1 = content_lower; - static const float lower_line_2 = lower_line_1 + horiz_line_thickness; - #endif - - - static const float4 box_color = float4( - CONTENT_BOX_COLOR_R, - CONTENT_BOX_COLOR_G, - CONTENT_BOX_COLOR_B, - 1.0 - ); - - void contentBoxPixelShader( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - - out float4 color : SV_Target - ) { - - const bool is_inside_outerbound = ( - texcoord.x >= left_line_1 && texcoord.x <= right_line_2 && - texcoord.y >= upper_line_1 && texcoord.y <= lower_line_2 - ); - const bool is_outside_innerbound = ( - texcoord.x <= left_line_2 || texcoord.x >= right_line_1 || - texcoord.y <= upper_line_2 || texcoord.y >= lower_line_1 - ); - - if (is_inside_outerbound && is_outside_innerbound) { - color = box_color; - } - else { - color = tex2D(ReShade::BackBuffer, texcoord); - } - } - - -#endif // CONTENT_BOX_VISIBLE -#endif // _CONTENT_BOX_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/deinterlace.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/shaders/deinterlace.fxh deleted file mode 100644 index 15878c5df..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/deinterlace.fxh +++ /dev/null @@ -1,137 +0,0 @@ -#ifndef _DEINTERLACE_H -#define _DEINTERLACE_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2020 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -#include "../lib/user-settings.fxh" -#include "../lib/derived-settings-and-constants.fxh" -#include "../lib/bind-shader-params.fxh" -#include "../lib/gamma-management.fxh" -#include "../lib/scanline-functions.fxh" - - - -void freezeFrameVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0 -) { - float use_deinterlacing_tex = enable_interlacing && ( - scanline_deinterlacing_mode == 2 || scanline_deinterlacing_mode == 3 - ); - - texcoord.x = (id == 2) ? use_deinterlacing_tex*2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2, -2) + float2(-1, 1), 0, 1); -} - -void freezeFramePS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - - out float4 color : SV_Target -) { - color = tex2D(samplerBeamConvergence, texcoord); -} - - -void deinterlaceVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0, - out float2 v_step : TEXCOORD1 -) { - freezeFrameVS(id, position, texcoord); - - v_step = float2(0.0, scanline_thickness * rcp(TEX_FREEZEFRAME_HEIGHT)); -} - - -void deinterlacePS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - in float2 v_step : TEXCOORD1, - - out float4 color : SV_Target -) { - // float2 scanline_offset_norm; - // float triangle_wave_freq; - // bool field_parity; - // bool wrong_field; - // calc_wrong_field(texcoord, scanline_offset_norm, triangle_wave_freq, field_parity, wrong_field); - - float2 rotated_coord = lerp(texcoord.yx, texcoord, geom_rotation_mode == 0 || geom_rotation_mode == 2); - float scale = lerp(CONTENT_WIDTH, CONTENT_HEIGHT, geom_rotation_mode == 0 || geom_rotation_mode == 2); - - InterpolationFieldData interpolation_data = calc_interpolation_field_data(rotated_coord, scale); - - // TODO: add scanline_parity to calc_wrong_field() - - // Weaving - // Sample texcoord from this frame and the previous frame - // If we're in the correct field, use the current sample - // If we're in the wrong field, average the current and prev samples - // In this case, we're probably averaging a color with 0 and producing a brightness of 0.5. - [branch] - if (enable_interlacing && scanline_deinterlacing_mode == 2) { - // const float cur_scanline_idx = get_curr_scanline_idx(texcoord.y, content_size.y); - // const float wrong_field = curr_line_is_wrong_field(cur_scanline_idx); - - const float4 cur_line_color = tex2D_nograd(samplerBeamConvergence, texcoord); - const float4 cur_line_prev_color = tex2D_nograd(samplerFreezeFrame, texcoord); - - const float4 avg_color = (cur_line_color + cur_line_prev_color) / 2.0; - - // Multiply by 1.5, so each pair of scanlines has total brightness 2 - const float4 raw_out_color = lerp(1.5*cur_line_color, avg_color, interpolation_data.wrong_field); - color = encode_output(raw_out_color, deinterlacing_blend_gamma); - } - // Blended Weaving - // Sample texcoord from this frame - // From the previous frame, sample the current scanline's sibling - // Do this by shifting up or down by a line - // If we're in the correct field, use the current sample - // If we're in the wrong field, average the current and prev samples - // In this case, we're averaging two fully illuminated colors - else if (enable_interlacing && scanline_deinterlacing_mode == 3) { - const float2 raw_offset = lerp(1, -1, interpolation_data.scanline_parity) * v_step; - const float2 curr_offset = lerp(0, raw_offset, interpolation_data.wrong_field); - const float2 prev_offset = lerp(raw_offset, 0, interpolation_data.wrong_field); - - const float4 cur_line_color = tex2D_nograd(samplerBeamConvergence, texcoord + curr_offset); - const float4 prev_line_color = tex2D_nograd(samplerFreezeFrame, texcoord + prev_offset); - - const float4 avg_color = (cur_line_color + prev_line_color) / 2.0; - const float4 raw_out_color = lerp(cur_line_color, avg_color, interpolation_data.wrong_field); - color = encode_output(raw_out_color, deinterlacing_blend_gamma); - } - // No temporal blending - else { - color = tex2D_nograd(samplerBeamConvergence, texcoord); - } -} - -#endif // _DEINTERLACE_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/electron-beams.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/shaders/electron-beams.fxh deleted file mode 100644 index a0bec77c0..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/electron-beams.fxh +++ /dev/null @@ -1,347 +0,0 @@ -#ifndef _ELECTRON_BEAMS_H -#define _ELECTRON_BEAMS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade. -// Copyright (C) 2020 Alex Gunter -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -#include "../lib/bind-shader-params.fxh" -#include "../lib/gamma-management.fxh" -#include "../lib/scanline-functions.fxh" - -#include "content-box.fxh" -#include "shared-objects.fxh" - - -void calculateBeamDistsVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0 -) { - const float compute_mask_factor = frame_count % 60 == 0 || overlay_active > 0; - - texcoord.x = (id == 2) ? compute_mask_factor*2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2, -2) + float2(-1, 1), 0, 1); -} - - -void calculateBeamDistsPS( - in float4 position : SV_Position, - in float2 texcoord : TEXCOORD0, - - out float4 beam_strength : SV_Target -) { - InterpolationFieldData interpolation_data = precalc_interpolation_field_data(texcoord); - - // We have to subtract off the texcoord offset to make sure we're using domain [0, 1] - const float color_corrected = texcoord.x - 1.0 / TEX_BEAMDIST_WIDTH; - - // Digital shape - // Beam will be perfectly rectangular - [branch] - if (beam_shape_mode == 0) { - // Double the intensity when interlacing to maintain the same apparent brightness - const float interlacing_brightness_factor = 1 + float( - enable_interlacing && - (scanline_deinterlacing_mode != 2) && - (scanline_deinterlacing_mode != 3) - ); - const float raw_beam_strength = (1 - interpolation_data.scanline_parity * enable_interlacing) * interlacing_brightness_factor * levels_autodim_temp; - - beam_strength = float4(color_corrected * raw_beam_strength, 0, 0, 1); - } - // Linear shape - // Beam intensity will drop off linarly with distance from center - // Works better than gaussian with narrow scanlines (about 1-6 pixels wide) - // Will only consider contribution from nearest scanline - else if (beam_shape_mode == 1) { - const float beam_dist_y = triangle_wave(texcoord.y, interpolation_data.triangle_wave_freq); - - const bool scanline_is_wider_than_1 = scanline_thickness > 1; - const bool deinterlacing_mode_requires_boost = ( - enable_interlacing && - (scanline_deinterlacing_mode != 2) && - (scanline_deinterlacing_mode != 3) - ); - - const float interlacing_brightness_factor = (1 + scanline_is_wider_than_1) * (1 + deinterlacing_mode_requires_boost); - // const float raw_beam_strength = (1 - beam_dist_y) * (1 - interpolation_data.scanline_parity * enable_interlacing) * interlacing_brightness_factor * levels_autodim_temp; - // const float raw_beam_strength = (1 - beam_dist_y); - const float raw_beam_strength = saturate(-beam_dist_y * rcp(linear_beam_thickness) + 1); - const float adj_beam_strength = raw_beam_strength * (1 - interpolation_data.scanline_parity * enable_interlacing) * interlacing_brightness_factor * levels_autodim_temp; - - beam_strength = float4(color_corrected * adj_beam_strength, 0, 0, 1); - } - // Gaussian Shape - // Beam will be a distorted Gaussian, dependent on color brightness and hyperparameters - // Will only consider contribution from nearest scanline - else if (beam_shape_mode == 2) { - // Calculate {sigma, shape}_range outside of scanline_contrib so it's only - // done once per pixel (not 6 times) with runtime params. Don't reuse the - // vertex shader calculations, so static versions can be constant-folded. - const float sigma_range = max(gaussian_beam_max_sigma, gaussian_beam_min_sigma) - gaussian_beam_min_sigma; - const float shape_range = max(gaussian_beam_max_shape, gaussian_beam_min_shape) - gaussian_beam_min_shape; - - const float beam_dist_factor = 1 + float(enable_interlacing); - const float freq_adj = interpolation_data.triangle_wave_freq * rcp(beam_dist_factor); - // The conditional 0.25*f offset ensures the interlaced scanlines align with the non-interlaced ones as in the other beam shapes - const float frame_offset = enable_interlacing * (!interpolation_data.field_parity * 0.5 + 0.25) * rcp(freq_adj); - const float beam_dist_y = triangle_wave((texcoord.y - frame_offset), freq_adj) * rcp(linear_beam_thickness); - - const float interlacing_brightness_factor = 1 + float( - !enable_interlacing && - (scanline_thickness > 1) - ) + float( - enable_interlacing && - (scanline_deinterlacing_mode != 2) && - (scanline_deinterlacing_mode != 3) - ); - const float raw_beam_strength = get_gaussian_beam_strength( - beam_dist_y, color_corrected, - sigma_range, shape_range - ) * interlacing_brightness_factor * levels_autodim_temp; - - beam_strength = float4(raw_beam_strength, 0, 0, 1); - } - // Gaussian Shape - // Beam will be a distorted Gaussian, dependent on color brightness and hyperparameters - // Will consider contributions from current scanline and two neighboring in-field scanlines - else { - // Calculate {sigma, shape}_range outside of scanline_contrib so it's only - // done once per pixel (not 6 times) with runtime params. Don't reuse the - // vertex shader calculations, so static versions can be constant-folded. - const float sigma_range = max(gaussian_beam_max_sigma, gaussian_beam_min_sigma) - gaussian_beam_min_sigma; - const float shape_range = max(gaussian_beam_max_shape, gaussian_beam_min_shape) - gaussian_beam_min_shape; - - const float beam_dist_factor = (1 + float(enable_interlacing)); - const float freq_adj = interpolation_data.triangle_wave_freq * rcp(beam_dist_factor); - // The conditional 0.25*f offset ensures the interlaced scanlines align with the non-interlaced ones as in the other beam shapes - const float frame_offset = enable_interlacing * (!interpolation_data.field_parity * 0.5 + 0.25) * rcp(freq_adj); - const float curr_beam_dist_y = triangle_wave(texcoord.y - frame_offset, freq_adj) * rcp(linear_beam_thickness); - const float upper_beam_dist_y = (sawtooth_incr_wave(texcoord.y - frame_offset, freq_adj)*2 + 1) * rcp(linear_beam_thickness); - const float lower_beam_dist_y = 4 * rcp(linear_beam_thickness) - upper_beam_dist_y; - - const float upper_beam_strength = get_gaussian_beam_strength( - upper_beam_dist_y, color_corrected, - sigma_range, shape_range - ); - const float curr_beam_strength = get_gaussian_beam_strength( - curr_beam_dist_y, color_corrected, - sigma_range, shape_range - ); - const float lower_beam_strength = get_gaussian_beam_strength( - lower_beam_dist_y, color_corrected, - sigma_range, shape_range - ); - - const float interlacing_brightness_factor = 1 + float( - !enable_interlacing && - (scanline_thickness > 1) - ) + float( - enable_interlacing && - (scanline_deinterlacing_mode != 2) && - (scanline_deinterlacing_mode != 3) - ); - const float3 raw_beam_strength = float3(curr_beam_strength, upper_beam_strength, lower_beam_strength) * interlacing_brightness_factor * levels_autodim_temp; - - beam_strength = float4(raw_beam_strength, 1); - } -} - - -void simulateEletronBeamsVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0, - out float4 runtime_bin_shapes : TEXCOORD1 -) { - #if ENABLE_PREBLUR - PostProcessVS(id, position, texcoord); - #else - // texcoord.x = (id == 0 || id == 2) ? content_left : content_right; - // texcoord.y = (id < 2) ? content_lower : content_upper; - // position.x = (id == 0 || id == 2) ? -1 : 1; - // position.y = (id < 2) ? -1 : 1; - // position.zw = 1; - contentCropVS(id, position, texcoord); - #endif - - bool screen_is_landscape = geom_rotation_mode == 0 || geom_rotation_mode == 2; - - // Mode 0: size of pixel in [0, 1] = pixel_dims / viewport_size - // Mode 1: size of pixel in [0, 1] = viewport_size / grid_dims - // float2 runtime_pixel_size = (pixel_grid_mode == 0) ? pixel_size * rcp(content_size) : rcp(pixel_grid_resolution); - float2 runtime_pixel_size = rcp(content_size); - float2 runtime_scanline_shape = lerp( - float2(scanline_thickness, 1), - float2(1, scanline_thickness), - screen_is_landscape - ) * rcp(content_size); - - runtime_bin_shapes = float4(runtime_pixel_size, runtime_scanline_shape); -} - -void simulateEletronBeamsPS( - in float4 position : SV_Position, - in float2 texcoord : TEXCOORD0, - in float4 runtime_bin_shapes : TEXCOORD1, - - out float4 color : SV_Target -) { - bool screen_is_landscape = geom_rotation_mode == 0 || geom_rotation_mode == 2; - float2 rotated_coord = lerp(texcoord.yx, texcoord, screen_is_landscape); - float scale = lerp(CONTENT_WIDTH, CONTENT_HEIGHT, screen_is_landscape); - - // InterpolationFieldData interpolation_data = precalc_interpolation_field_data(rotated_coord); - - // // We have to subtract off the texcoord offset to make sure we're using domain [0, 1] - // const float color_corrected = rotated_coord.x - 1.0 / scale; - - - InterpolationFieldData interpolation_data = calc_interpolation_field_data(rotated_coord, scale); - const float ypos = (rotated_coord.y * interpolation_data.triangle_wave_freq + interpolation_data.field_parity) * 0.5; - - float2 texcoord_scanlined = round_coord(texcoord, 0, runtime_bin_shapes.zw); - - // Sample from the neighboring scanline when in the wrong field - [branch] - if (interpolation_data.wrong_field && screen_is_landscape) { - const float coord_moved_up = texcoord_scanlined.y <= texcoord.y; - const float direction = lerp(-1, 1, coord_moved_up); - texcoord_scanlined.y += direction * scanline_thickness * rcp(content_size.y); - } - else if (interpolation_data.wrong_field) { - const float coord_moved_up = texcoord_scanlined.x <= texcoord.x; - const float direction = lerp(-1, 1, coord_moved_up); - texcoord_scanlined.x += direction * scanline_thickness * rcp(content_size.x); - } - - // Now we apply pixellation and cropping - // float2 texcoord_pixellated = round_coord( - // texcoord_scanlined, - // pixel_grid_offset * rcp(content_size), - // runtime_bin_shapes.xy - // ); - float2 texcoord_pixellated = texcoord_scanlined; - - const float2 texcoord_uncropped = texcoord_pixellated; - #if ENABLE_PREBLUR - // If the pre-blur pass ran, then it's already handled cropping. - // const float2 texcoord_uncropped = texcoord_pixellated; - #define source_sampler samplerPreblurHoriz - #else - // const float2 texcoord_uncropped = texcoord_pixellated * content_scale + content_offset; - #define source_sampler ReShade::BackBuffer - #endif - - [branch] - if (beam_shape_mode < 3) { - const float4 scanline_color = tex2Dlod_linearize( - source_sampler, - texcoord_uncropped, - get_input_gamma() - ); - - const float beam_strength_r = tex2D_nograd(samplerBeamDist, float2(scanline_color.r, ypos)).x; - const float beam_strength_g = tex2D_nograd(samplerBeamDist, float2(scanline_color.g, ypos)).x; - const float beam_strength_b = tex2D_nograd(samplerBeamDist, float2(scanline_color.b, ypos)).x; - const float4 beam_strength = float4(beam_strength_r, beam_strength_g, beam_strength_b, 1); - - color = beam_strength; - } - else { - const float2 offset = float2(0, scanline_thickness) * (1 + enable_interlacing) * rcp(content_size); - - const float4 curr_scanline_color = tex2Dlod_linearize( - source_sampler, - texcoord_uncropped, - get_input_gamma() - ); - const float4 upper_scanline_color = tex2Dlod_linearize( - source_sampler, - texcoord_uncropped - offset, - get_input_gamma() - ); - const float4 lower_scanline_color = tex2Dlod_linearize( - source_sampler, - texcoord_uncropped + offset, - get_input_gamma() - ); - - const float curr_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.r, ypos)).x; - const float curr_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.g, ypos)).x; - const float curr_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.b, ypos)).x; - - const float upper_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.r, ypos)).y; - const float upper_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.g, ypos)).y; - const float upper_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.b, ypos)).y; - - const float lower_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.r, ypos)).z; - const float lower_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.g, ypos)).z; - const float lower_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.b, ypos)).z; - - color = float4( - curr_beam_strength_r + upper_beam_strength_r + lower_beam_strength_r, - curr_beam_strength_g + upper_beam_strength_g + lower_beam_strength_g, - curr_beam_strength_b + upper_beam_strength_b + lower_beam_strength_b, - 1 - ); - } -} - -void beamConvergenceVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0, - out float run_convergence : TEXCOORD1 -) { - PostProcessVS(id, position, texcoord); - const uint3 x_flag = convergence_offset_x != 0; - const uint3 y_flag = convergence_offset_y != 0; - run_convergence = dot(x_flag, 1) + dot(y_flag, 1); -} - -void beamConvergencePS( - in float4 position : SV_Position, - in float2 texcoord : TEXCOORD0, - in float run_convergence : TEXCOORD1, - - out float4 color : SV_TARGET -) { - // [branch] - if (!run_convergence) { - color = tex2D(samplerElectronBeams, texcoord - float2(0, scanline_offset * rcp(content_size.y))); - } - else { - const float3 offset_sample = sample_rgb_scanline( - samplerElectronBeams, texcoord - float2(0, scanline_offset * rcp(content_size.y)), - TEX_ELECTRONBEAMS_SIZE, rcp(TEX_ELECTRONBEAMS_SIZE) - ); - - color = float4(offset_sample, 1); - } -} - -#endif // _ELECTRON_BEAMS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/geometry-aa-last-pass.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/shaders/geometry-aa-last-pass.fxh deleted file mode 100644 index bd6a7dca4..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/geometry-aa-last-pass.fxh +++ /dev/null @@ -1,220 +0,0 @@ -#ifndef _GEOMETRY_AA_LAST_PASS_H -#define _GEOMETRY_AA_LAST_PASS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade. -// Copyright (C) 2020 Alex Gunter -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -#include "../lib/user-settings.fxh" -#include "../lib/derived-settings-and-constants.fxh" -#include "../lib/bind-shader-params.fxh" -#include "../lib/gamma-management.fxh" -#include "../lib/tex2Dantialias.fxh" -#include "../lib/geometry-functions.fxh" - -// Disabled in the ReShade port because I don't know a good way to make these -// static AND global AND defined with sin(), cos(), or pow(). - -// #if !_RUNTIME_GEOMETRY_TILT -// // Create a local-to-global rotation matrix for the CRT's coordinate frame -// // and its global-to-local inverse. See the vertex shader for details. -// // It's faster to compute these statically if possible. -// static const float2 sin_tilt = sin(geom_tilt_angle_static); -// static const float2 cos_tilt = cos(geom_tilt_angle_static); -// static const float3x3 geom_local_to_global_static = float3x3( -// cos_tilt.x, sin_tilt.y*sin_tilt.x, cos_tilt.y*sin_tilt.x, -// 0.0, cos_tilt.y, -sin_tilt.y, -// -sin_tilt.x, sin_tilt.y*cos_tilt.x, cos_tilt.y*cos_tilt.x); -// static const float3x3 geom_global_to_local_static = float3x3( -// cos_tilt.x, 0.0, -sin_tilt.x, -// sin_tilt.y*sin_tilt.x, cos_tilt.y, sin_tilt.y*cos_tilt.x, -// cos_tilt.y*sin_tilt.x, -sin_tilt.y, cos_tilt.y*cos_tilt.x); -// #endif - -float2x2 mul_scale(float2 scale, float2x2 mtrx) -{ - float4 temp_matrix = float4(mtrx[0][0], mtrx[0][1], mtrx[1][0], mtrx[1][1]) * scale.xxyy; - return float2x2(temp_matrix.x, temp_matrix.y, temp_matrix.z, temp_matrix.w); -} - - -void geometryVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0, - out float2 output_size_inv : TEXCOORD1, - out float4 geom_aspect_and_overscan : TEXCOORD2, - out float3 eye_pos_local : TEXCOORD3, - out float3 global_to_local_row0 : TEXCOORD4, - out float3 global_to_local_row1 : TEXCOORD5, - out float3 global_to_local_row2 : TEXCOORD6 -) { - PostProcessVS(id, position, texcoord); - - output_size_inv = 1.0 / content_size; - - // Get aspect/overscan vectors from scalar parameters (likely uniforms): - const float viewport_aspect_ratio = output_size_inv.y / output_size_inv.x; - const float2 geom_aspect = get_aspect_vector(viewport_aspect_ratio); - const float2 geom_overscan = get_geom_overscan_vector(); - geom_aspect_and_overscan = float4(geom_aspect, geom_overscan); - - #if _RUNTIME_GEOMETRY_TILT - // Create a local-to-global rotation matrix for the CRT's coordinate - // frame and its global-to-local inverse. Rotate around the x axis - // first (pitch) and then the y axis (yaw) with yucky Euler angles. - // Positive angles go clockwise around the right-vec and up-vec. - // Runtime shader parameters prevent us from computing these globally, - // but we can still combine the pitch/yaw matrices by hand to cut a - // few instructions. Note that cg matrices fill row1 first, then row2, - // etc. (row-major order). - const float2 geom_tilt_angle = get_geom_tilt_angle_vector(); - const float2 sin_tilt = sin(geom_tilt_angle); - const float2 cos_tilt = cos(geom_tilt_angle); - // Conceptual breakdown: - static const float3x3 rot_x_matrix = float3x3( - 1.0, 0.0, 0.0, - 0.0, cos_tilt.y, -sin_tilt.y, - 0.0, sin_tilt.y, cos_tilt.y); - static const float3x3 rot_y_matrix = float3x3( - cos_tilt.x, 0.0, sin_tilt.x, - 0.0, 1.0, 0.0, - -sin_tilt.x, 0.0, cos_tilt.x); - static const float3x3 local_to_global = - mul(rot_y_matrix, rot_x_matrix); -/* static const float3x3 global_to_local = - transpose(local_to_global); - const float3x3 local_to_global = float3x3( - cos_tilt.x, sin_tilt.y*sin_tilt.x, cos_tilt.y*sin_tilt.x, - 0.0, cos_tilt.y, sin_tilt.y, - sin_tilt.x, sin_tilt.y*cos_tilt.x, cos_tilt.y*cos_tilt.x); -*/ // This is a pure rotation, so transpose = inverse: - const float3x3 global_to_local = transpose(local_to_global); - // Decompose the matrix into 3 float3's for output: - global_to_local_row0 = float3(global_to_local[0][0], global_to_local[0][1], global_to_local[0][2]);//._m00_m01_m02); - global_to_local_row1 = float3(global_to_local[1][0], global_to_local[1][1], global_to_local[1][2]);//._m10_m11_m12); - global_to_local_row2 = float3(global_to_local[2][0], global_to_local[2][1], global_to_local[2][2]);//._m20_m21_m22); - #else - static const float3x3 global_to_local = geom_global_to_local_static; - static const float3x3 local_to_global = geom_local_to_global_static; - #endif - - // Get an optimal eye position based on geom_view_dist, viewport_aspect, - // and CRT radius/rotation: - #if _RUNTIME_GEOMETRY_MODE - const float geom_mode = geom_mode_runtime; - #else - static const float geom_mode = geom_mode_static; - #endif - const float3 eye_pos_global = get_ideal_global_eye_pos(local_to_global, geom_aspect, geom_mode); - eye_pos_local = mul(global_to_local, eye_pos_global); -} - -void geometryPS( - in float4 position : SV_Position, - in float2 texcoord : TEXCOORD0, - in float2 output_size_inv : TEXCOORD1, - in float4 geom_aspect_and_overscan : TEXCOORD2, - in float3 eye_pos_local : TEXCOORD3, - in float3 global_to_local_row0 : TEXCOORD4, - in float3 global_to_local_row1 : TEXCOORD5, - in float3 global_to_local_row2 : TEXCOORD6, - - out float4 color : SV_Target -) { - // Localize some parameters: - const float2 geom_aspect = geom_aspect_and_overscan.xy; - const float2 geom_overscan = geom_aspect_and_overscan.zw; - #if _RUNTIME_GEOMETRY_TILT - const float3x3 global_to_local = float3x3(global_to_local_row0, - global_to_local_row1, global_to_local_row2); - #else - static const float3x3 global_to_local = geom_global_to_local_static; - #endif - #if _RUNTIME_GEOMETRY_MODE - const float geom_mode = geom_mode_runtime; - #else - static const float geom_mode = geom_mode_static; - #endif - - // Get flat and curved texture coords for the current fragment point sample - // and a pixel_to_tangent_video_uv matrix for transforming pixel offsets: - // video_uv = relative position in video frame, mapped to [0.0, 1.0] range - // tex_uv = relative position in padded texture, mapped to [0.0, 1.0] range - const float2 flat_video_uv = texcoord; - float2x2 pixel_to_video_uv; - float2 video_uv_no_geom_overscan; - if(geom_mode > 0.5) - { - video_uv_no_geom_overscan = - get_curved_video_uv_coords_and_tangent_matrix(flat_video_uv, - eye_pos_local, output_size_inv, geom_aspect, - geom_mode, global_to_local, pixel_to_video_uv); - } - else - { - video_uv_no_geom_overscan = flat_video_uv; - pixel_to_video_uv = float2x2( - output_size_inv.x, 0.0, 0.0, output_size_inv.y); - } - // Correct for overscan here (not in curvature code): - const float2 video_uv = - (video_uv_no_geom_overscan - float2(0.5, 0.5))/geom_overscan + float2(0.5, 0.5); - const float2 tex_uv = video_uv; - - // Get a matrix transforming pixel vectors to tex_uv vectors: - const float2x2 pixel_to_tex_uv = - mul_scale(1.0 / geom_overscan, pixel_to_video_uv); - - // Sample! Skip antialiasing if antialias_level < 0.5 or both of these hold: - // 1.) Geometry/curvature isn't used - // 2.) Overscan == float2(1.0, 1.0) - // Skipping AA is sharper, but it's only faster with dynamic branches. - const float2 abs_aa_r_offset = abs(get_aa_subpixel_r_offset()); - // this next check seems to always return true, even when it shouldn't so disabling it for now - const bool need_subpixel_aa = false;//abs_aa_r_offset.x + abs_aa_r_offset.y > 0.0; - float3 raw_color; - - if(antialias_level > 0.5 && (geom_mode > 0.5 || any(bool2((geom_overscan.x != 1.0), (geom_overscan.y != 1.0))))) - { - // Sample the input with antialiasing (due to sharp phosphors, etc.): - raw_color = tex2Daa(samplerBloomHorizontal, tex_uv, pixel_to_tex_uv, float(frame_count), get_intermediate_gamma()); - } - else if(antialias_level > 0.5 && need_subpixel_aa) - { - // Sample at each subpixel location: - raw_color = tex2Daa_subpixel_weights_only( - samplerBloomHorizontal, tex_uv, pixel_to_tex_uv, get_intermediate_gamma()); - } - else - { - raw_color = tex2D_linearize(samplerBloomHorizontal, tex_uv, get_intermediate_gamma()).rgb; - } - - // Dim borders and output the final result: - const float border_dim_factor = get_border_dim_factor(video_uv, geom_aspect); - const float3 final_color = raw_color * border_dim_factor; - - color = encode_output(float4(final_color, 1.0), get_output_gamma()); -} - -#endif // _GEOMETRY_AA_LAST_PASS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/input-blurring.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/shaders/input-blurring.fxh deleted file mode 100644 index 6b4444004..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/input-blurring.fxh +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef _INPUT_BLURRING_H -#define _INPUT_BLURRING_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2022 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -// Theoretically this could go in blurring.fxh -// But that file has a bunch of GPL stuff in it. -// Keeping it separate makes it easier to communicate that this portion is -// available under the MIT license. - -#include "../lib/downsampling-functions.fxh" - -#include "content-box.fxh" -#include "shared-objects.fxh" - - -void preblurVertPS( - in const float4 pos : SV_Position, - in const float2 texcoord : TEXCOORD0, - - out float4 color : SV_Target -) { - const float2 texcoord_uncropped = texcoord; - - const float2 max_delta_uv = float2(0.0, rcp(content_size.y)) * preblur_effect_radius; - const float2 delta_uv = max_delta_uv * rcp(max(preblur_sampling_radius.y, 1)); - - color = float4(opaque_linear_downsample( - ReShade::BackBuffer, - texcoord_uncropped, - preblur_sampling_radius.y, - delta_uv - ), 1); -} - -void preblurHorizPS( - in const float4 pos : SV_Position, - in const float2 texcoord : TEXCOORD0, - - out float4 color : SV_Target -) { - const float2 max_delta_uv = float2(rcp(content_size.x), 0.0) * preblur_effect_radius; - const float2 delta_uv = max_delta_uv * rcp(max(preblur_sampling_radius.x, 1)); - - color = float4(opaque_linear_downsample( - samplerPreblurVert, - texcoord, - preblur_sampling_radius.x, - delta_uv - ), 1); -} - -#endif // _INPUT_BLURRING_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/phosphor-mask.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/shaders/phosphor-mask.fxh deleted file mode 100644 index 9a2e1649b..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/phosphor-mask.fxh +++ /dev/null @@ -1,211 +0,0 @@ -#ifndef _PHOSPHOR_MASK_H -#define _PHOSPHOR_MASK_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2022 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -#include "../lib/bind-shader-params.fxh" -#include "../lib/phosphor-mask-calculations.fxh" - -#include "shared-objects.fxh" - - -// Split into 64 segments that overlap a little bit -static const float num_segments = 64; -static const float segment_offset = 0.015625; // 1/64 -static const float segment_width = 0.0234375; // 1/128 - -void generatePhosphorMaskVS( - in uint id : SV_VertexID, - - out float4 position : SV_Position, - out float2 texcoord : TEXCOORD0, - out float2 viewport_frequency_factor: TEXCOORD1, - out float2 mask_pq_x : TEXCOORD2, - out float2 mask_pq_y : TEXCOORD3 -) { - const float screen_segment_idx = frame_count % num_segments; - const float left_coord = lerp(segment_offset * screen_segment_idx, 0, overlay_active > 0); - const float right_coord = lerp(left_coord + segment_width, 1, overlay_active > 0); - const float pos_center = 2 * (left_coord + 0.5 * segment_width - 0.5); - const float pos_left = lerp(pos_center - segment_width, -1, overlay_active > 0); - const float pos_right = lerp(pos_center + segment_width, 1, overlay_active > 0); - - #if _DX9_ACTIVE - texcoord.x = (id == 1 || id == 3) ? right_coord : left_coord; - texcoord.y = (id > 1) ? 1 : 0; - - position.x = (id == 1 || id == 3) ? pos_right : pos_left; - position.y = (id > 1) ? -1 : 1; - position.zw = 1; - #else - texcoord.x = (id & 1) ? right_coord : left_coord; - texcoord.y = (id & 2) ? 1 : 0; - - position.x = (id & 1) ? pos_right : pos_left; - position.y = (id & 2) ? -1 : 1; - position.zw = 1; - #endif - - viewport_frequency_factor = calc_phosphor_viewport_frequency_factor(); - - // We don't alter these based on screen rotation because they're independent of screen dimensions. - float edge_norm_tx; - float edge_norm_ty; - [flatten] - switch (mask_type) { - case 0: - edge_norm_tx = grille_edge_norm_t; - break; - case 1: - edge_norm_tx = slot_edge_norm_tx; - edge_norm_ty = slot_edge_norm_ty; - break; - case 2: - edge_norm_tx = shadow_edge_norm_tx; - edge_norm_ty = shadow_edge_norm_ty; - break; - case 3: - edge_norm_tx = smallgrille_edge_norm_t; - break; - case 4: - edge_norm_tx = smallslot_edge_norm_tx; - edge_norm_ty = smallslot_edge_norm_ty; - break; - default: - edge_norm_tx = smallshadow_edge_norm_tx; - edge_norm_ty = smallshadow_edge_norm_ty; - break; - } - - const float2 thickness_scaled = linearize_phosphor_thickness_param(phosphor_thickness); - const float mask_p_x = exp(-calculate_phosphor_p_value(edge_norm_tx, thickness_scaled.x, phosphor_sharpness.x)); - const float mask_p_y = exp(-calculate_phosphor_p_value(edge_norm_ty, thickness_scaled.y, phosphor_sharpness.y)); - mask_pq_x = float2(mask_p_x, phosphor_sharpness.x); - mask_pq_y = float2(mask_p_y, phosphor_sharpness.y); -} - -void generatePhosphorMaskPS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - in float2 viewport_frequency_factor: TEXCOORD1, - in float2 mask_pq_x : TEXCOORD2, - in float2 mask_pq_y : TEXCOORD3, - - out float4 color : SV_Target -) { - [branch] - if (geom_rotation_mode == 1 || geom_rotation_mode == 3) { - texcoord = texcoord.yx; - viewport_frequency_factor = viewport_frequency_factor.yx; - } - - float3 phosphor_color; - [branch] - if (mask_type == 0) { - phosphor_color = get_phosphor_intensity_grille( - texcoord, - viewport_frequency_factor, - mask_pq_x - ); - } - else if (mask_type == 1) { - phosphor_color = get_phosphor_intensity_slot( - texcoord, - viewport_frequency_factor, - mask_pq_x, - mask_pq_y - ); - } - else if (mask_type == 2) { - phosphor_color = get_phosphor_intensity_shadow( - texcoord, - viewport_frequency_factor, - float2(mask_pq_x.y, mask_pq_y.y) - ); - } - else if (mask_type == 3) { - phosphor_color = get_phosphor_intensity_grille_small( - texcoord, - viewport_frequency_factor, - mask_pq_x - ); - } - else if (mask_type == 4) { - phosphor_color = get_phosphor_intensity_slot_small( - texcoord, - viewport_frequency_factor, - mask_pq_x, - mask_pq_y - ); - } - else { - phosphor_color = get_phosphor_intensity_shadow_small( - texcoord, - viewport_frequency_factor, - mask_pq_x, - mask_pq_y - ); - } - - color = float4(phosphor_color, 1.0); -} - - -void applyComputedPhosphorMaskPS( - in float4 pos : SV_Position, - in float2 texcoord : TEXCOORD0, - - out float4 color : SV_Target -) { - bool use_deinterlacing_tex = enable_interlacing && ( - scanline_deinterlacing_mode == 2 || scanline_deinterlacing_mode == 3 - ); - - float3 scanline_color_dim; - [branch] - if (use_deinterlacing_tex) scanline_color_dim = tex2D(samplerDeinterlace, texcoord).rgb; - else scanline_color_dim = tex2D(samplerBeamConvergence, texcoord).rgb; - - const float3 phosphor_color = tex2D(samplerPhosphorMask, texcoord).rgb; - - // Sample the halation texture (auto-dim to match the scanlines), and - // account for both horizontal and vertical convergence offsets, given - // in units of texels horizontally and same-field scanlines vertically: - const float3 halation_color = tex2D_linearize(samplerBlurHorizontal, texcoord, get_intermediate_gamma()).rgb; - - // Apply halation: Halation models electrons flying around under the glass - // and hitting the wrong phosphors (of any color). It desaturates, so - // average the halation electrons to a scalar. Reduce the local scanline - // intensity accordingly to conserve energy. - const float halation_intensity_dim_scalar = dot(halation_color, float3(1, 1, 1)) / 3.0; - const float3 halation_intensity_dim = halation_intensity_dim_scalar; - const float3 electron_intensity_dim = lerp(scanline_color_dim, halation_intensity_dim, halation_weight); - - // Apply the phosphor mask: - const float3 phosphor_emission_dim = electron_intensity_dim * phosphor_color; - - color = float4(phosphor_emission_dim, 1.0); -} - -#endif // _PHOSPHOR_MASK_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/shared-objects.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/shaders/shared-objects.fxh deleted file mode 100644 index 0ef24ff54..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/shaders/shared-objects.fxh +++ /dev/null @@ -1,370 +0,0 @@ -#ifndef _SHARED_OBJECTS_H -#define _SHARED_OBJECTS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade. -// Copyright (C) 2020 Alex Gunter -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -#include "../lib/helper-functions-and-macros.fxh" -#include "../lib/derived-settings-and-constants.fxh" -#include "../lib/bind-shader-params.fxh" - - -// Yes, the WIDTH/HEIGHT/SIZE defines are kinda weird. -// Yes, we have to have them or something similar. This is for D3D11 which -// returns (0, 0) when you call tex2Dsize() on the pass's render target. - - -// Pass 0 Buffer (cropPass) -// Cannot be conditioned on __RENDERER__ b/c there are no -// available buffers of the same size -// Last usage is in interlacingPass -// electronBeamPass -> beamConvergencePass -// deinterlacePass -> phosphorMaskPass -// brightpassPass -> bloomHorizontalPass -// #define TEX_CROP_WIDTH content_size.x -// #define TEX_CROP_HEIGHT content_size.y -// #define TEX_CROP_SIZE int2(TEX_CROP_WIDTH, TEX_CROP_HEIGHT) -// texture2D texCrop { -// Width = TEX_CROP_WIDTH; -// Height = TEX_CROP_HEIGHT; - -// Format = RGBA16; -// }; -// sampler2D samplerCrop { Texture = texCrop; }; - - -// Pass 1 Buffer (interlacingPass) -// Cannot be conditioned on __RENDERER__ b/c there are no -// available buffers of the same size -// Last usage is in electronBeamPass -// beamConvergencPass -> freezeFramePass -// phosphorMaskPass -> bloomHorizontalPass -// #define TEX_INTERLACED_WIDTH content_size.x -// #define TEX_INTERLACED_HEIGHT content_size.y -// #define TEX_INTERLACED_SIZE int2(TEX_INTERLACED_WIDTH, TEX_INTERLACED_HEIGHT) -// texture2D texInterlaced { -// Width = TEX_INTERLACED_WIDTH; -// Height = TEX_INTERLACED_HEIGHT; - -// Format = RGBA16; -// }; -// sampler2D samplerInterlaced { Texture = texInterlaced; }; - -// Pass 2 Buffer (electronBeamPass) -// Last usage is in beamConvergencePass - - -#define TEX_PREBLUR_VERT_WIDTH content_size.x -#define TEX_PREBLUR_VERT_HEIGHT content_size.y -static const uint2 TEX_PREBLUR_SIZE = uint2(TEX_PREBLUR_VERT_WIDTH, TEX_PREBLUR_VERT_HEIGHT); -texture2D texPreblurVert < pooled = true; > { - Width = TEX_PREBLUR_VERT_WIDTH; - Height = TEX_PREBLUR_VERT_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerPreblurVert { Texture = texPreblurVert; }; - -#define TEX_PREBLUR_HORIZ_WIDTH content_size.x -#define TEX_PREBLUR_HORIZ_HEIGHT content_size.y -static const uint2 TEX_PREBLUR_SIZE = uint2(TEX_PREBLUR_HORIZ_WIDTH, TEX_PREBLUR_HORIZ_HEIGHT); -texture2D texPreblurHoriz < pooled = true; > { - Width = TEX_PREBLUR_HORIZ_WIDTH; - Height = TEX_PREBLUR_HORIZ_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerPreblurHoriz { Texture = texPreblurHoriz; }; - - -#define TEX_BEAMDIST_WIDTH num_beamdist_color_samples -#define TEX_BEAMDIST_HEIGHT num_beamdist_dist_samples -#define TEX_BEAMDIST_SIZE int2(TEX_BEAMDIST_WIDTH, TEX_BEAMDIST_HEIGHT) -texture2D texBeamDist < pooled = false; > { - Width = TEX_BEAMDIST_WIDTH; - Height = TEX_BEAMDIST_HEIGHT; - - - Format = RGB10A2; -}; -sampler2D samplerBeamDist { - Texture = texBeamDist; - AddressV = WRAP; -}; - - -// Pass 2 Buffer (electronBeamPass) -// Last usage is in beamConvergencePass -#define TEX_ELECTRONBEAMS_WIDTH content_size.x -#define TEX_ELECTRONBEAMS_HEIGHT content_size.y -#define TEX_ELECTRONBEAMS_SIZE int2(TEX_ELECTRONBEAMS_WIDTH, TEX_ELECTRONBEAMS_HEIGHT) -texture2D texElectronBeams < pooled = true; > { - Width = TEX_ELECTRONBEAMS_WIDTH; - Height = TEX_ELECTRONBEAMS_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerElectronBeams { - Texture = texElectronBeams; - - AddressU = BORDER; - AddressV = BORDER; -}; -// #define texElectronBeams texCrop -// #define samplerElectronBeams samplerCrop - - -// Pass 3 Buffer (beamConvergencPass) -// Last usage is freezeFramePass -#define TEX_BEAMCONVERGENCE_WIDTH content_size.x -#define TEX_BEAMCONVERGENCE_HEIGHT content_size.y -#define TEX_BEAMCONVERGENCE_SIZE int2(TEX_BEAMCONVERGENCE_WIDTH, TEX_BEAMCONVERGENCE_HEIGHT) -texture2D texBeamConvergence < pooled = true; > { - Width = TEX_BEAMCONVERGENCE_WIDTH; - Height = TEX_BEAMCONVERGENCE_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerBeamConvergence { Texture = texBeamConvergence; }; -// #define texBeamConvergence texInterlaced -// #define samplerBeamConvergence samplerInterlaced - - -/* -// Pass 4 Buffer (bloomApproxPass) -// Cannot be conditioned on __RENDERER__ b/c there are no -// available buffers of the same size -// Last usage is in brightpassPass -#define TEX_BLOOMAPPROX_WIDTH 320 -#define TEX_BLOOMAPPROX_HEIGHT 240 -#define TEX_BLOOMAPPROX_SIZE int2(TEX_BLOOMAPPROX_WIDTH, TEX_BLOOMAPPROX_HEIGHT) -texture2D texBloomApprox { - Width = TEX_BLOOMAPPROX_WIDTH; - Height = TEX_BLOOMAPPROX_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerBloomApprox { Texture = texBloomApprox; }; -*/ - -// Pass 4a Buffer (bloomApproxVerticalPass) -// Cannot be conditioned on __RENDERER__ b/c there are no -// available buffers of the same size -// Last usage is in brightpassPass -#define TEX_BLOOMAPPROXVERT_WIDTH content_size.x -// #define TEX_BLOOMAPPROXVERT_HEIGHT 240 -#define TEX_BLOOMAPPROXVERT_HEIGHT int(content_size.y / bloomapprox_downsizing_factor) -#define TEX_BLOOMAPPROXVERT_SIZE int2(TEX_BLOOMAPPROXVERT_WIDTH, TEX_BLOOMAPPROXVERT_HEIGHT) -texture2D texBloomApproxVert < pooled = true; > { - Width = TEX_BLOOMAPPROXVERT_WIDTH; - Height = TEX_BLOOMAPPROXVERT_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerBloomApproxVert { Texture = texBloomApproxVert; }; - -// Pass 4b Buffer (bloomApproxHorizontalPass) -// Cannot be conditioned on __RENDERER__ b/c there are no -// available buffers of the same size -// Last usage is in brightpassPass -// #define TEX_BLOOMAPPROXHORIZ_WIDTH 320 -// #define TEX_BLOOMAPPROXHORIZ_HEIGHT 240 -#define TEX_BLOOMAPPROXHORIZ_WIDTH int(content_size.x / bloomapprox_downsizing_factor) -#define TEX_BLOOMAPPROXHORIZ_HEIGHT TEX_BLOOMAPPROXVERT_HEIGHT -#define TEX_BLOOMAPPROXHORIZ_SIZE int2(TEX_BLOOMAPPROXHORIZ_WIDTH, TEX_BLOOMAPPROXHORIZ_HEIGHT) -texture2D texBloomApproxHoriz < pooled = true; > { - Width = TEX_BLOOMAPPROXHORIZ_WIDTH; - Height = TEX_BLOOMAPPROXHORIZ_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerBloomApproxHoriz { Texture = texBloomApproxHoriz; }; - -// Pass 5 Buffer (blurVerticalPass) -// Cannot be conditioned on __RENDERER__ b/c there are no -// available buffers of the same size -// Last usage is blurHorizontalPass -#define TEX_BLURVERTICAL_WIDTH TEX_BLOOMAPPROXHORIZ_WIDTH -#define TEX_BLURVERTICAL_HEIGHT TEX_BLOOMAPPROXHORIZ_HEIGHT -#define TEX_BLURVERTICAL_SIZE int2(TEX_BLURVERTICAL_WIDTH, TEX_BLURVERTICAL_HEIGHT) -texture2D texBlurVertical < pooled = true; > { - Width = TEX_BLURVERTICAL_WIDTH; - Height = TEX_BLURVERTICAL_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerBlurVertical { Texture = texBlurVertical; }; - - -// Pass 6 Buffer (blurHorizontalPass) -// Cannot be conditioned on __RENDERER__ b/c there are no -// available buffers of the same size -// Last usage is bloomHorizontalPass -#define TEX_BLURHORIZONTAL_WIDTH TEX_BLOOMAPPROXHORIZ_WIDTH -#define TEX_BLURHORIZONTAL_HEIGHT TEX_BLOOMAPPROXHORIZ_HEIGHT -#define TEX_BLURHORIZONTAL_SIZE int2(TEX_BLURHORIZONTAL_WIDTH, TEX_BLURHORIZONTAL_HEIGHT) -texture2D texBlurHorizontal < pooled = true; > { - Width = TEX_BLURHORIZONTAL_WIDTH; - Height = TEX_BLURHORIZONTAL_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerBlurHorizontal { Texture = texBlurHorizontal; }; - - -// Pass 7 (deinterlacePass) -// Last usage is phosphorMaskPass -#define TEX_DEINTERLACE_WIDTH content_size.x -#define TEX_DEINTERLACE_HEIGHT content_size.y -#define TEX_DEINTERLACE_SIZE int2(TEX_DEINTERLACE_WIDTH, TEX_DEINTERLACE_HEIGHT) -#if _DX9_ACTIVE == 0 - texture2D texDeinterlace < pooled = true; > { - Width = TEX_DEINTERLACE_WIDTH; - Height = TEX_DEINTERLACE_HEIGHT; - - Format = RGBA16; - }; - sampler2D samplerDeinterlace { Texture = texDeinterlace; }; -#else - #define texDeinterlace texElectronBeams - #define samplerDeinterlace samplerElectronBeams -#endif - -// Pass 8 (freezeFramePass) -// Do not condition this on __RENDERER__. It will not work if another -// pass corrupts it. -#define TEX_FREEZEFRAME_WIDTH content_size.x -#define TEX_FREEZEFRAME_HEIGHT content_size.y -#define TEX_FREEZEFRAME_SIZE int2(TEX_FREEZEFRAME_WIDTH, TEX_FREEZEFRAME_HEIGHT -texture2D texFreezeFrame < pooled = false; > { - Width = TEX_FREEZEFRAME_WIDTH; - Height = TEX_FREEZEFRAME_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerFreezeFrame { Texture = texFreezeFrame; }; - - -// Pass 10 Mask Texture (phosphorMaskResizeHorizontalPass) -// Cannot be conditioned on __RENDERER__ b/c there are no -// available buffers of the same size -#define TEX_PHOSPHORMASK_WIDTH content_size.x -#define TEX_PHOSPHORMASK_HEIGHT content_size.y -#define TEX_PHOSPHORMASKL_SIZE int2(TEX_PHOSPHORMASK_WIDTH, TEX_PHOSPHORMASK_HEIGHT) -texture2D texPhosphorMask < pooled = false; > { - Width = TEX_PHOSPHORMASK_WIDTH; - Height = TEX_PHOSPHORMASK_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerPhosphorMask { Texture = texPhosphorMask; }; - - -// Pass 11 Buffer (phosphorMaskPass) -// Last usage is bloomHorizontalPass -#define TEX_MASKEDSCANLINES_WIDTH content_size.x -#define TEX_MASKEDSCANLINES_HEIGHT content_size.y -#define TEX_MASKEDSCANLINES_SIZE int2(TEX_MASKEDSCANLINES_WIDTH, TEX_MASKEDSCANLINES_HEIGHT) - -#if _DX9_ACTIVE == 0 - texture2D texMaskedScanlines < pooled = true; > { - Width = TEX_MASKEDSCANLINES_WIDTH; - Height = TEX_MASKEDSCANLINES_HEIGHT; - - Format = RGBA16; - }; - sampler2D samplerMaskedScanlines { Texture = texMaskedScanlines; }; -#else - #define texMaskedScanlines texBeamConvergence - #define samplerMaskedScanlines samplerBeamConvergence -#endif - - -// Pass 12 Buffer (brightpassPass) -// Last usage is bloomHorizontalPass -#define TEX_BRIGHTPASS_WIDTH content_size.x -#define TEX_BRIGHTPASS_HEIGHT content_size.y -#define TEX_BRIGHTPASS_SIZE int2(TEX_BRIGHTPASS_WIDTH, TEX_BRIGHTPASS_HEIGHT) - -#if _DX9_ACTIVE == 0 - texture2D texBrightpass < pooled = true; > { - Width = TEX_BRIGHTPASS_WIDTH; - Height = TEX_BRIGHTPASS_HEIGHT; - - Format = RGBA16; - }; - sampler2D samplerBrightpass { Texture = texBrightpass; }; -#else - #define texBrightpass texElectronBeams - #define samplerBrightpass samplerElectronBeams -#endif - - -// Pass 13 Buffer (bloomVerticalPass) -// Cannot be conditioned on __RENDERER__ b/c there are no -// available buffers of the same size -// Last usage is bloomHorizontalPass -#define TEX_BLOOMVERTICAL_WIDTH content_size.x -#define TEX_BLOOMVERTICAL_HEIGHT content_size.y -#define TEX_BLOOMVERTICAL_SIZE int2(TEX_BLOOMVERTICAL_WIDTH, TEX_BLOOMVERTICAL_HEIGHT) -texture2D texBloomVertical < pooled = true; > { - Width = TEX_BLOOMVERTICAL_WIDTH; - Height = TEX_BLOOMVERTICAL_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerBloomVertical { Texture = texBloomVertical; }; - - -// Pass 14 Buffer (bloomHorizontalPass) -// Cannot be conditioned on __RENDERER__ b/c there are no -// available buffers of the same size -// Last usage is geometryPass -#define TEX_BLOOMHORIZONTAL_WIDTH content_size.x -#define TEX_BLOOMHORIZONTAL_HEIGHT content_size.y -#define TEX_BLOOMHORIZONTAL_SIZE int2(TEX_BLOOMHORIZONTAL_WIDTH, TEX_BLOOMHORIZONTAL_HEIGHT) -texture2D texBloomHorizontal < pooled = true; > { - Width = TEX_BLOOMHORIZONTAL_WIDTH; - Height = TEX_BLOOMHORIZONTAL_HEIGHT; - - Format = RGBA16; -}; -sampler2D samplerBloomHorizontal { Texture = texBloomHorizontal; }; - - -// Pass 15 Buffer (geometryPass) -// Last usage is uncropPass -#define TEX_GEOMETRY_WIDTH content_size.x -#define TEX_GEOMETRY_HEIGHT content_size.y -#define TEX_GEOMETRY_SIZE int2(TEX_GEOMETRY_WIDTH, TEX_GEOMETRY_HEIGHT) - -#if _DX9_ACTIVE == 0 - texture2D texGeometry < pooled = true; > { - Width = TEX_GEOMETRY_WIDTH; - Height = TEX_GEOMETRY_HEIGHT; - - Format = RGBA16; - }; - sampler2D samplerGeometry { Texture = texGeometry; }; -#else - #define texGeometry texElectronBeams - #define samplerGeometry samplerElectronBeams -#endif - -#endif // _SHARED_OBJECTS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt-royale/version-number.fxh b/data/resources/shaders/reshade/Shaders/crt-royale/version-number.fxh deleted file mode 100644 index 79d17d2fd..000000000 --- a/data/resources/shaders/reshade/Shaders/crt-royale/version-number.fxh +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef _VERSION_NUMBER_H -#define _VERSION_NUMBER_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2022 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -#define MAJOR_VERSION 2 -#define MINOR_VERSION 1 -#define PATCH_VERSION 0 - -// Yes, both sibling preprocessor functions are necessary. -// Don't "simplify" this, or the substitution won't work. -#define BUILD_DOT_VERSION_(mav, miv, pav) #mav "." #miv "." #pav -#define BUILD_DOT_VERSION(mav, miv, pav) BUILD_DOT_VERSION_(mav, miv, pav) -#define DOT_VERSION_STR BUILD_DOT_VERSION(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION) - -// Again, yes, both sibling preprocessor functions are necessary. -// Don't "simplify" this, or the substitution won't work. -#define BUILD_UNDERSCORE_VERSION_(prefix, mav, miv, pav) prefix ## _ ## mav ## _ ## miv ## _ ## pav -#define BUILD_UNDERSCORE_VERSION(p, mav, miv, pav) BUILD_UNDERSCORE_VERSION_(p, mav, miv, pav) -#define APPEND_VERSION_SUFFIX(prefix) BUILD_UNDERSCORE_VERSION(prefix, MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION) - - -#endif // _VERSION_NUMBER_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-consumer.fx b/data/resources/shaders/reshade/Shaders/crt/crt-consumer.fx deleted file mode 100644 index 2b8f1d5c0..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-consumer.fx +++ /dev/null @@ -1,797 +0,0 @@ -#include "ReShade.fxh" - - -/* - CRT-Consumer - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -*/ - - - -uniform float PRE_SCALE < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 4.0; - ui_step = 0.1; - ui_label = "Pre-Scale Sharpening"; -> = 1.5; - -uniform float blurx < - ui_type = "drag"; - ui_min = -4.0; - ui_max = 4.0; - ui_step = 0.05; - ui_label = "Convergence X"; -> = 0.25; - -uniform float blury < - ui_type = "drag"; - ui_min = -4.0; - ui_max = 4.0; - ui_step = 0.05; - ui_label = "Convergence Y"; -> = -0.1; - -uniform float warpx < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.12; - ui_step = 0.01; - ui_label = " Curvature X"; -> = 0.03; - -uniform float warpy < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.12; - ui_step = 0.01; - ui_label = " Curvature Y"; -> = 0.04; - -uniform float corner < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.10; - ui_step = 0.01; - ui_label = " Corner size"; -> = 0.03; - -uniform float smoothness < - ui_type = "drag"; - ui_min = 100.0; - ui_max = 600.0; - ui_step = 5.0; - ui_label = " Border Smoothness"; -> = 400.0; - -uniform bool inter < - ui_type = "radio"; - ui_label = "Interlacing Toggle"; -> = true; - -uniform float Downscale < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 8.0; - ui_step = 1.; - ui_label = "Interlacing Downscale Scanlines"; -> = 2.0; - -uniform float scanlow < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 15.0; - ui_step = 1.0; - ui_label = "Beam low"; -> = 6.0; - -uniform float scanhigh < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 15.0; - ui_step = 1.0; - ui_label = "Beam high"; -> = 8.0; - -uniform float beamlow < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 2.5; - ui_step = 0.05; - ui_label = "Scanlines dark"; -> = 1.45; - -uniform float beamhigh < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 2.5; - ui_step = 0.05; - ui_label = "Scanlines bright"; -> = 1.05; - -uniform float preserve < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "Protect White On Masks"; -> = 0.98; - -uniform float brightboost1 < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 3.0; - ui_step = 0.05; - ui_label = "Bright boost dark pixels"; -> = 1.25; - -uniform float brightboost2 < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 3.0; - ui_step = 0.05; - ui_label = "Bright boost bright pixels"; -> = 1.0; - -uniform float glow < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 6.0; - ui_step = 1.0; - ui_label = "Glow pixels per axis"; -> = 3.0; - -uniform float quality < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 4.0; - ui_step = 0.05; - ui_label = "Glow quality"; -> = 1.0; - -uniform float glow_str < - ui_type = "drag"; - ui_min = 0.0001; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Glow intensity"; -> = 0.3; - -uniform float nois < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 32.0; - ui_step = 1.0; - ui_label = "Add Noise"; -> = 0.0; - -uniform float postbr < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.5; - ui_step = 0.02; - ui_label = "Post Brightness"; -> = 1.0; - -uniform float palette_fix < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 1.0; - ui_label = "Palette Fixes. Sega, PUAE Atari ST dark colors"; -> = 0.0; - -uniform float Shadowmask < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 8.0; - ui_step = 1.; - ui_label = "Mask Type"; -> = 0.0; - -uniform float masksize < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 2.0; - ui_step = 1.0; - ui_label = "Mask Size"; -> = 1.0; - -uniform float MaskDark < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.1; - ui_label = "Mask dark"; -> = 0.2; - -uniform float MaskLight < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.1; - ui_label = "Mask light"; -> = 1.5; - -uniform float slotmask < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Slot Mask Strength"; -> = 0.0; - -uniform float slotwidth < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 6.0; - ui_step = 0.5; - ui_label = "Slot Mask Width"; -> = 2.0; - -uniform float double_slot < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 2.0; - ui_step = 1.0; - ui_label = "Slot Mask Height: 2x1 or 4x1"; -> = 1.0; - -uniform float slotms < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 2.0; - ui_step = 1.0; - ui_label = "Slot Mask Size"; -> = 1.0; - -uniform float GAMMA_OUT < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 0.05; - ui_label = "Gamma Out"; -> = 2.25; - -uniform float sat < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Saturation"; -> = 1.0; - -uniform float contrast < - ui_type = "drag"; - ui_min = 0.00; - ui_max = 2.00; - ui_step = 0.05; - ui_label = "Contrast, 1.0:Off"; -> = 1.0; - -uniform float WP < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 5.; - ui_label = "Color Temperature %"; -> = 0.0; - -uniform float rg < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Red-Green Tint"; -> = 0.0; - -uniform float rb < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Red-Blue Tint"; -> = 0.0; - -uniform float gr < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Green-Red Tint"; -> = 0.0; - -uniform float gb < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Green-Blue Tint"; -> = 0.0; - -uniform float br < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Blue-Red Tint"; -> = 0.0; - -uniform float bg < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Blue-Green Tint"; -> = 0.0; - -uniform bool vignette < - ui_type = "radio"; - ui_label = "Vignette On/Off"; -> = false; - -uniform float vpower < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "Vignette Power"; -> = 0.15; - -uniform float vstr < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 50.0; - ui_step = 1.0; - ui_label = "Vignette strength"; -> = 40.0; - -uniform bool alloff < - ui_type = "radio"; - ui_label = "Switch off shader"; -> = false; - - -uniform float FrameCount < source = "framecount"; >; -uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >; -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float2 ViewportSize < source = "viewportsize"; >; -uniform float ViewportX < source = "viewportx"; >; -uniform float ViewportY < source = "viewporty"; >; -uniform float ViewportWidth < source = "viewportwidth"; >; -uniform float ViewportHeight < source = "viewportheight"; >; -uniform float2 ViewportOffset < source = "viewportoffset"; >; -uniform float BufferWidth < source = "bufferwidth"; >; -uniform float BufferHeight < source = "bufferheight"; >; -uniform float NativeWidth < source = "nativewidth"; >; -uniform float NativeHeight < source = "nativeheight"; >; -uniform float InternalWidth < source = "internalwidth"; >; -uniform float InternalHeight < source = "internalheight"; >; - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;}; - -#define iTime (float(FrameCount)/2.0) -#define iTimer (float(FrameCount)/60.0) - -#define SourceSize (float4(1.0/NormalizedNativePixelSize,NormalizedNativePixelSize)) -#define OutputSize (ViewportSize*BufferToViewportRatio) - -float2 Warp(float2 pos) -{ - pos = pos * 2.0 - 1.0; - pos *= float2(1.0 + (pos.y * pos.y) * warpx, 1.0 + (pos.x * pos.x) * warpy); - return pos * 0.5 + 0.5; -} - -float sw(float y, float l) -{ - float beam = lerp(scanlow, scanhigh, y); - float scan = lerp(beamlow, beamhigh, l); - float ex = y * scan; - return exp2(-beam * ex * ex); -} - -float3 mask(float2 x, float3 col, float l) -{ - x = floor(x / masksize); - - if (Shadowmask == 0.0) - { - float m = frac(x.x * 0.4999); - if (m < 0.4999) return float3(1.0, MaskDark, 1.0); - else return float3(MaskDark, 1.0, MaskDark); - } - - else if (Shadowmask == 1.0) - { - float3 Mask = float3(MaskDark, MaskDark, MaskDark); - float line = MaskLight; - float odd = 0.0; - - if (frac(x.x / 6.0) < 0.5) odd = 1.0; - if (frac((x.y + odd) / 2.0) < 0.5) line = MaskDark; - - float m = frac(x.x / 3.0); - if (m < 0.333) Mask.b = MaskLight; - else if (m < 0.666) Mask.g = MaskLight; - else Mask.r = MaskLight; - - Mask *= line; - return Mask; - } - - else if (Shadowmask == 2.0) - { - float m = frac(x.x*0.3333); - if (m < 0.3333) return float3(MaskDark, MaskDark, MaskLight); - if (m < 0.6666) return float3(MaskDark, MaskLight, MaskDark); - else return float3(MaskLight, MaskDark, MaskDark); - } - - if (Shadowmask == 3.0) - { - float m = frac(x.x * 0.5); - if (m < 0.5) return float3(1.0, 1.0, 1.0); - else return float3(MaskDark, MaskDark, MaskDark); - } - - else if (Shadowmask == 4.0) - { - float3 Mask = float3(col.rgb); - float line = MaskLight; - float odd = 0.0; - - if (frac(x.x / 4.0) < 0.5) odd = 1.0; - if (frac((x.y + odd) / 2.0) < 0.5) line = MaskDark; - - float m = frac(x.x / 2.0); - if (m < 0.5) { Mask.r = 1.0; Mask.b = 1.0; } - else Mask.g = 1.0; - - Mask *= line; - return Mask; - } - - else if (Shadowmask == 5.0) - { - float3 Mask = float3(1.0, 1.0, 1.0); - - if (frac(x.x / 4.0) < 0.5) - { - if (frac(x.y / 3.0) < 0.666) - { - if (frac(x.x / 2.0) < 0.5) Mask = float3(1.0, MaskDark, 1.0); - else Mask = float3(MaskDark, 1.0, MaskDark); - } - else Mask *= l; - } - else if (frac(x.x / 4.0) >= 0.5) - { - if (frac(x.y / 3.0) > 0.333) - { - if (frac(x.x / 2.0) < 0.5) Mask = float3(1.0, MaskDark, 1.0); - else Mask = float3(MaskDark, 1.0, MaskDark); - } - else Mask *= l; - } - - return Mask; - } - - else if (Shadowmask == 6.0) - { - float3 Mask = float3(MaskDark, MaskDark, MaskDark); - if (frac(x.x / 6.0) < 0.5) - { - if (frac(x.y / 4.0) < 0.75) - { - if (frac(x.x / 3.0) < 0.3333) Mask.r = MaskLight; - else if (frac(x.x / 3.0) < 0.6666) Mask.g = MaskLight; - else Mask.b = MaskLight; - } - else Mask * l * 0.9; - } - else if (frac(x.x / 6.0) >= 0.5) - { - if (frac(x.y / 4.0) >= 0.5 || frac(x.y / 4.0) < 0.25) - { - if (frac(x.x / 3.0) < 0.3333) Mask.r = MaskLight; - else if (frac(x.x / 3.0) < 0.6666) Mask.g = MaskLight; - else Mask.b = MaskLight; - } - else Mask * l * 0.9; - } - return Mask; - } - - else if (Shadowmask == 7.0) - { - float m = frac(x.x * 0.3333); - - if (m < 0.3333) return float3(MaskDark, MaskLight, MaskLight * col.b); //Cyan - if (m < 0.6666) return float3(MaskLight * col.r, MaskDark, MaskLight); //Magenta - else return float3(MaskLight, MaskLight * col.g, MaskDark); //Yellow - } - - else if (Shadowmask == 8.0) - { - float3 Mask = float3(MaskDark, MaskDark, MaskDark); - - float bright = MaskLight; - float left = 0.0; - if (frac(x.x / 6.0) < 0.5) left = 1.0; - - float m = frac(x.x / 3.0); - if (m < 0.333) Mask.b = 0.9; - else if (m < 0.666) Mask.g = 0.9; - else Mask.r = 0.9; - - if ((x.y % 2.0) == 1.0 && left == 1.0 || (x.y % 2.0) == 0.0 && left == 0.0) - Mask *= bright; - - return Mask; - } - - else return float3(1.0, 1.0, 1.0); -} - -float SlotMask(float2 pos, float3 c) -{ - if (slotmask == 0.0) return 1.0; - - pos = floor(pos / slotms); - float mx = pow(max(max(c.r, c.g), c.b), 1.33); - float mlen = slotwidth * 2.0; - float px = frac(pos.x / mlen); - float py = floor(frac(pos.y / (2.0 * double_slot)) * 2.0 * double_slot); - float slot_dark = lerp(1.0 - slotmask, 1.0 - 0.80 * slotmask, mx); - float slot = 1.0 + 0.7 * slotmask * (1.0 - mx); - - if (py == 0.0 && px < 0.5) slot = slot_dark; - else if (py == double_slot && px >= 0.5) slot = slot_dark; - - return slot; -} - -float4x4 contrastMatrix(float contrast) -{ - float t = (1.0 - contrast) / 2.0; - - return float4x4(contrast, 0, 0, 0, - 0, contrast, 0, 0, - 0, 0, contrast, 0, - t, t, t, 1); -} - -float3x3 vign(float l, float2 tex) -{ - float2 vpos = tex; - vpos *= 1.0 - vpos.xy; - - float vig = vpos.x * vpos.y * vstr; - vig = min(pow(vig, vpower), 1.0); - if (vignette == false) vig = 1.0; - - return float3x3(vig, 0, 0, - 0, vig, 0, - 0, 0, vig); -} - -float3 saturation(float3 textureColor) -{ - float luminance = length(textureColor.rgb) * 0.5775; - - float3 luminanceWeighting = float3(0.4, 0.5, 0.1); - if (luminance < 0.5) luminanceWeighting.rgb = (luminanceWeighting.rgb * luminanceWeighting.rgb) - + (luminanceWeighting.rgb * luminanceWeighting.rgb); - - luminance = dot(textureColor.rgb, luminanceWeighting); - float3 greyScaleColor = float3(luminance, luminance, luminance); - - float3 res = float3(lerp(greyScaleColor, textureColor.rgb, sat)); - return res; -} - -/////////////////////////////////////////////////////////////////////////////////////////////////////////// - -float3 glow0 (float2 texcoord, float3 col) -{ - - // the more quality, the smaller the offset and better quality, less visible glow too - float2 size = SourceSize.zw/quality; - - float3 c01; - float3 sum = float3(0.0, 0.0, 0.0); - - // glow = pixels per axis, the more the slower! - - for (float x = -glow; x <= glow; x = x+1.0) - { - - // multiply texture, the more far away the less pronounced - float factor = 1.0/glow; - for (float y = -glow; y <= glow; y = y+1.0) - { - - float2 offset = float2(x, y) * size; - - c01 = tex2D(sBackBuffer, texcoord + offset).rgb*factor; c01 = c01*c01; - - sum += c01; - } - } - - return (glow_str * sum / (glow * glow )) ; -} - -/////////////////////////////////////////////////////////////////////////////////////////////////////////// - -float noise(float2 co) -{ - return frac(sin(iTimer * dot(co.xy ,float2(12.9898,78.233))) * 43758.5453); -} - -float corner0(float2 coord) -{ - coord = (coord - float2(0.5, 0.5)) * 1.0 + float2(0.5, 0.5); - coord = min(coord, float2(1.0, 1.0) - coord) * float2(1.0, SourceSize.y / SourceSize.x); - - float2 cdist = float2(corner, corner); - coord = (cdist - min(coord, cdist)); - float dist = sqrt(dot(coord, coord)); - - return clamp((cdist.x - dist) * smoothness, 0.0, 1.0); -} - -static const float3x3 D65_to_XYZ = float3x3( - 0.4306190, 0.2220379, 0.0201853, - 0.3415419, 0.7066384, 0.1295504, - 0.1783091, 0.0713236, 0.9390944); - -static const float3x3 XYZ_to_D65 = float3x3( - 3.0628971, -0.9692660, 0.0678775, - -1.3931791, 1.8760108, -0.2288548, - -0.4757517, 0.0415560, 1.0693490); - -static const float3x3 D50_to_XYZ = float3x3( - 0.4552773, 0.2323025, 0.0145457, - 0.3675500, 0.7077956, 0.1049154, - 0.1413926, 0.0599019, 0.7057489); - -static const float3x3 XYZ_to_D50 = float3x3( - 2.9603944, -0.9787684, 0.0844874, - -1.4678519, 1.9161415, -0.2545973, - -0.4685105, 0.0334540, 1.4216174); - - -float4 PS_CRT_CONSUMER(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target -{ - float2 pos = Warp(vTexCoord.xy); - float2 tex_size = SourceSize.xy; - - float2 pC4 = (pos + 0.5/tex_size); - float2 fp = frac(pos * tex_size); - if (inter == false && tex_size.y > 400.0){ fp.y = frac(pos.y * tex_size.y*1.0/Downscale);} - - float4 res = float4(1.0, 1.0, 1.0, 1.0); - - if (alloff == true) - res = tex2D(sBackBuffer, pC4); - else - { - - float2 texel = pos * tex_size; - float2 texel_floored = floor(texel); - - float scale = PRE_SCALE; - float region_range = 0.5 - 0.5 / scale; - - // Figure out where in the texel to sample to get correct pre-scaled bilinear. - // Uses the hardware bilinear interpolator to avoid having to sample 4 times manually. - - float2 center_dist = fp - 0.5; - - float2 fpp = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5; - - float2 mod_texel = texel_floored + fpp; - float2 coords = mod_texel / SourceSize.xy; - - float3 sample1 = tex2D(sBackBuffer, float2(coords.x + blurx*SourceSize.z, coords.y - blury*SourceSize.w)).rgb; - float3 sample2 = tex2D(sBackBuffer, coords).rgb; - float3 sample3 = tex2D(sBackBuffer, float2(coords.x - blurx*SourceSize.z, coords.y + blury*SourceSize.w )).rgb; - - float3 color = float3(sample1.r * 0.5 + sample2.r * 0.5, - sample1.g * 0.25 + sample2.g * 0.5 + sample3.g * 0.25, - sample2.b * 0.5 + sample3.b * 0.5); - if (palette_fix != 0.0) - { - if (palette_fix == 1.0) color = color* 1.0667; - else if (palette_fix == 2.0) color = color * 2.0; - } - - //COLOR TEMPERATURE FROM GUEST.R-DR.VENOM - if (WP != 0.0) - { - float3 warmer = mul(color, D50_to_XYZ); - warmer = mul(warmer, XYZ_to_D65); - - float3 cooler = mul(color, D65_to_XYZ); - cooler = mul(cooler, XYZ_to_D50); - - float m = abs(WP) / 100.0; - float3 comp = (WP < 0.0) ? cooler : warmer; - comp = clamp(comp, 0.0, 1.0); - - color = float3(lerp(color, comp, m)); - } - - float3x3 hue = float3x3 (1., rg, rb, //red tint - gr, 1., gb, //green tint - br, bg, 1.); //blue tint - - color = mul(color, hue); - - color = (2.0*pow(color,float3(2.8, 2.8, 2.8))) - pow(color,float3(3.6, 3.6, 3.6)); - - float lum = color.r * 0.3 + color.g * 0.6 + color.b * 0.1; - - float f = frac(fp.y -0.5); - - if (inter == true && tex_size.y > 400.0) color = color; - else - {color = color * sw(f,lum) + color * sw (1.0-f,lum);} - - float lum1 = color.r * 0.3 + color.g * 0.6 + color.b * 0.1; - - - color *= lerp(mask((vTexCoord * OutputSize.xy), color,lum1), float3(1.0, 1.0, 1.0), lum1*preserve); - - - if (slotmask != 0.0) color *= SlotMask((vTexCoord * OutputSize.xy) * 1.0001, color); - - color *= lerp(brightboost1, brightboost2, max(max(color.r, color.g), color.b)); - - - - color = pow(color,float3(1.0 / GAMMA_OUT, 1.0 / GAMMA_OUT, 1.0 / GAMMA_OUT)); - if (glow_str != 0.0) color += glow0(coords,color); - - if (sat != 1.0) color = saturation(color); - if (corner != 0.0) color *= corner0(pC4); - if (nois != 0.0) color *= 1.0 + noise(coords * 2.0) / nois; - - color *= lerp(1.0, postbr, lum); - res = float4(color, 1.0); - if (contrast != 1.0) res = mul(res, contrastMatrix(contrast)); - if (inter == true && SourceSize.y > 400.0 && frac(iTime) < 0.5) res = res * 0.95; - res.rgb = mul(res.rgb, vign(lum, vTexCoord)); - - } - - return res; -} - - - -technique CRT_CONSUMER -{ - pass - { - VertexShader = PostProcessVS; - PixelShader = PS_CRT_CONSUMER; - } -} diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-cyclon.fx b/data/resources/shaders/reshade/Shaders/crt/crt-cyclon.fx deleted file mode 100644 index edf74da55..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-cyclon.fx +++ /dev/null @@ -1,545 +0,0 @@ -#include "ReShade.fxh" - - -// DariusG presents - -// 'crt-Cyclon' - -// Why? Because it's speedy! - -// A super-fast shader based on the magnificent crt-Geom, optimized for full speed -// on a Xiaomi Note 3 Pro cellphone (around 170(?) gflops gpu or so) - -// This shader uses parts from: -// crt-Geom (scanlines) -// Quillez (main filter) -// Grade (some primaries) -// Dogway's inverse Gamma -// Masks-slot-color handling, tricks etc are mine. - -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or (at your option) -// any later version. - - - -uniform float SCANLINE < - ui_type = "drag"; - ui_min = 0.2; - ui_max = 0.6; - ui_step = 0.05; - ui_label = "Scanline Weight"; -> = 0.3; - -uniform bool INTERLACE < - ui_type = "radio"; - ui_label = "Interlacing On/Off"; -> = 1.0; - -uniform float bogus_msk < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.0; - ui_step = 0.0; - ui_label = " [ MASK SETTINGS ] "; -> = 0.0; - -uniform float M_TYPE < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Mask Type: -1:None, 0:CGWG, 1:RGB"; -> = 1.0; - -uniform float MSIZE < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 2.0; - ui_step = 1.0; - ui_label = "Mask Size"; -> = 1.0; - -uniform bool SLOT < - ui_type = "radio"; - ui_label = "Slot Mask On/Off"; -> = 1.0; - -uniform float SLOTW < - ui_type = "drag"; - ui_min = 2.0; - ui_max = 3.0; - ui_step = 1.0; - ui_label = "Slot Mask Width"; -> = 3.0; - -uniform float BGR < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Subpixels BGR/RGB"; -> = 0.0; - -uniform float Maskl < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Mask Brightness Dark"; -> = 0.3; - -uniform float Maskh < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Mask Brightness Bright"; -> = 0.75; - -uniform float bogus_geom < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.0; - ui_step = 0.0; - ui_label = " [ GEOMETRY SETTINGS ] "; -> = 0.0; - -uniform bool bzl < - ui_type = "radio"; - ui_label = "Bezel On/Off"; -> = 1.0; - -uniform float ambient < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Ambient Light"; -> = 0.40; - -uniform float zoomx < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Zoom Image X"; -> = 0.0; - -uniform float zoomy < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Zoom Image Y"; -> = 0.0; - -uniform float centerx < - ui_type = "drag"; - ui_min = -5.0; - ui_max = 5.0; - ui_step = 0.05; - ui_label = "Image Center X"; -> = 0.0; - -uniform float centery < - ui_type = "drag"; - ui_min = -5.0; - ui_max = 5.0; - ui_step = 0.05; - ui_label = "Image Center Y"; -> = 0.0; - -uniform float WARPX < - ui_type = "drag"; - ui_min = 0.00; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Curvature Horizontal"; -> = 0.02; - -uniform float WARPY < - ui_type = "drag"; - ui_min = 0.00; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Curvature Vertical"; -> = 0.01; - -uniform bool vig < - ui_type = "radio"; - ui_label = "Vignette On/Off"; -> = 1.0; - -uniform float bogus_col < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.0; - ui_step = 0.0; - ui_label = " [ COLOR SETTINGS ] "; -> = 0.0; - -uniform float BR_DEP < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.333; - ui_step = 0.01; - ui_label = "Scan/Mask Brightness Dependence"; -> = 0.2; - -uniform float c_space < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 3.0; - ui_step = 1.0; - ui_label = "Color Space: sRGB,PAL,NTSC-U,NTSC-J"; -> = 0.0; - -uniform float EXT_GAMMA < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "External Gamma In (Glow etc)"; -> = 0.0; - -uniform float SATURATION < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Saturation"; -> = 1.0; - -uniform float BRIGHTNESS_ < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 0.01; - ui_label = "Brightness, Sega fix:1.06"; -> = 1.0; - -uniform float BLACK < - ui_type = "drag"; - ui_min = -0.20; - ui_max = 0.20; - ui_step = 0.01; - ui_label = "Black Level"; -> = 0.0; - -uniform float RG < - ui_type = "drag"; - ui_min = -0.25; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Green <-to-> Red Hue"; -> = 0.0; - -uniform float RB < - ui_type = "drag"; - ui_min = -0.25; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Blue <-to-> Red Hue"; -> = 0.0; - -uniform float GB < - ui_type = "drag"; - ui_min = -0.25; - ui_max = 0.25; - ui_step = 0.01; - ui_label = "Blue <-to-> Green Hue"; -> = 0.0; - -uniform float bogus_con < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.0; - ui_step = 0.0; - ui_label = " [ CONVERGENCE SETTINGS ] "; -> = 0.0; - -uniform float C_STR < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.5; - ui_step = 0.05; - ui_label = "Convergence Overall Strength"; -> = 0.0; - -uniform float CONV_R < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Convergence Red X-Axis"; -> = 0.0; - -uniform float CONV_G < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Convergence Green X-axis"; -> = 0.0; - -uniform float CONV_B < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Convergence Blue X-Axis"; -> = 0.0; - -uniform float POTATO < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Potato Boost(Simple Gamma, adjust Mask)"; -> = 0.0; - - -#define blck ((1.0)/(1.0-BLACK)) -#define pi 3.1415926535897932384626433 - -uniform float2 BufferViewportRatio < source = "buffer_to_viewport_ratio"; >; -uniform float2 InternalPixelSize < source = "internal_pixel_size"; >; -uniform float2 NativePixelSize < source = "native_pixel_size"; >; -uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >; -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float UpscaleMultiplier < source = "upscale_multiplier"; >; -uniform float2 ViewportSize < source = "viewportsize"; >; -uniform int FrameCount < source = "framecount"; >; - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;}; - -texture tBezel < source = "crt-cyclon/bezel.png"; > -{ - Width = BUFFER_WIDTH; - Height = BUFFER_HEIGHT; - MipLevels = 1; -}; - -sampler sBezel { Texture = tBezel; AddressU = BORDER; AddressV = BORDER; MinFilter = LINEAR; MagFilter = LINEAR;}; - -float3 Mask(float2 pos, float CGWG) -{ - float3 mask = float3(CGWG,CGWG,CGWG); - - -if (M_TYPE == 0.0){ - - if (POTATO == 1.0) { float pot = (1.0-CGWG)*sin(pos.x*pi)+CGWG; return float3(pot,pot,pot); } - else{ - float m = frac(pos.x*0.5); - - if (m<0.5) mask.rb = float2(1.0,1.0); - else mask.g = 1.0; - - return mask; - } -} - -if (M_TYPE == 1.0){ - - if (POTATO == 1.0) { float pot = (1.0-CGWG)*sin(pos.x*pi*0.6667)+CGWG; return float3(pot,pot,pot );} - else{ - float m = frac(pos.x*0.3333); - - if (m<0.3333) mask.rgb = (BGR == 0.0) ? float3(mask.r, mask.g, 1.0) : float3(1.0, mask.g, mask.b); - else if (m<0.6666) mask.g = 1.0; - else mask.rgb = (BGR == 0.0) ? float3(1.0, mask.g, mask.b) : float3(mask.r, mask.g, 1.0); - return mask; - } -} - else return float3(1.0,1.0,1.0); - -} - -float scanlineWeights(float distance, float3 color, float x) - { - // "wid" controls the width of the scanline beam, for each RGB - // channel The "weights" lines basically specify the formula - // that gives you the profile of the beam, i.e. the intensity as - // a function of distance from the vertical center of the - // scanline. In this case, it is gaussian if width=2, and - // becomes nongaussian for larger widths. Ideally this should - // be normalized so that the integral across the beam is - // independent of its width. That is, for a narrower beam - // "weights" should have a higher peak at the center of the - // scanline than for a wider beam. - float wid = SCANLINE + 0.15 * dot(color, float3(0.25-0.8*x, 0.25-0.8*x, 0.25-0.8*x)); //0.8 vignette strength - float weights = distance / wid; - return 0.4 * exp(-weights * weights ) / wid; - } - -#define pwr float3(1.0/((-1.0*SCANLINE+1.0)*(-0.8*CGWG+1.0))-1.2,1.0/((-1.0*SCANLINE+1.0)*(-0.8*CGWG+1.0))-1.2,1.0/((-1.0*SCANLINE+1.0)*(-0.8*CGWG+1.0))-1.2) -// Returns gamma corrected output, compensated for scanline+mask embedded gamma -float3 inv_gamma(float3 col, float3 power) -{ - float3 cir = col-1.0; - cir *= cir; - col = lerp(sqrt(col),sqrt(1.0-cir),power); - return col; -} - -// standard 6500k -static const float3x3 PAL = float3x3( -1.0740 , -0.0574 , -0.0119 , -0.0384 , 0.9699 , -0.0059 , --0.0079 , 0.0204 , 0.9884 ); - -// standard 6500k -static const float3x3 NTSC = float3x3( -0.9318 , 0.0412 , 0.0217 , -0.0135 , 0.9711 , 0.0148 , -0.0055 , -0.0143 , 1.0085 ); - -// standard 8500k -static const float3x3 NTSC_J = float3x3( -0.9501 , -0.0431 , 0.0857 , -0.0265 , 0.9278 , 0.0432 , -0.0011 , -0.0206 , 1.3153 ); - -float3 slot(float2 pos) -{ - float h = frac(pos.x/SLOTW); - float v = frac(pos.y); - - float odd; - if (v<0.5) odd = 0.0; else odd = 1.0; - -if (odd == 0.0) - {if (h<0.5) return float3(0.5,0.5,0.5); else return float3(1.5,1.5,1.5);} - -else if (odd == 1.0) - {if (h<0.5) return float3(1.5,1.5,1.5); else return float3(0.5,0.5,0.5);} -} - -float2 Warp(float2 pos) -{ - pos = pos*2.0-1.0; - pos *= float2(1.0+pos.y*pos.y*WARPX, 1.0+pos.x*pos.x*WARPY); - pos = pos*0.5+0.5; - - return pos; -} - -uniform float2 BufferHeight < source = "bufferheight"; >; - -float4 CRT_CYCLON_PS(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target -{ - float4 SourceSize = float4(1.0 / NormalizedNativePixelSize, NormalizedNativePixelSize); - float2 OutputSize = ViewportSize; - - float2 scale = BufferViewportRatio.xy; - float2 warpcoords = (vTexCoord-float2(0.5,0.5)) * BufferViewportRatio + float2(0.5,0.5); - -// Hue matrix inside main() to avoid GLES error -float3x3 hue = float3x3( - 1.0, -RG, -RB, - RG, 1.0, -GB, - RB, GB, 1.0 -); -// zoom in and center screen for bezel - float2 pos = Warp((vTexCoord*float2(1.0-zoomx,1.0-zoomy)-float2(centerx,centery)/100.0)); - float4 bez = float4(0.0,0.0,0.0,0.0); -// if (bzl == 1.0) bez = tex2D(sBezel,vTexCoord*SourceSize.xy/OriginalSize.xy*0.97+float2(0.015,0.015)); -// if (bzl == 1.0) bez = tex2D(sBezel,vTexCoord*scale*0.97+float2(0.015,0.015)); - if (bzl == true) bez = tex2D(sBezel,warpcoords*0.97+float2(0.015,0.015)); // This fix Bezel to adjust to Game's aspect ratio. - - bez.rgb = lerp(bez.rgb, float3(ambient,ambient,ambient),0.5); - - float2 bpos = pos; - float2 ps = SourceSize.zw; - float2 dx = float2(ps.x,0.0); -// Quilez - float2 ogl2 = pos*SourceSize.xy; - float2 i = floor(pos*SourceSize.xy) + 0.5; - float f = ogl2.y - i.y; - pos.y = (i.y + 4.0*f*f*f)*ps.y; // smooth - pos.x = lerp(pos.x, i.x*ps.x, 0.2); - -// Convergence - float3 res0 = tex2D(sBackBuffer,pos).rgb; - float resr = tex2D(sBackBuffer,pos + dx*CONV_R).r; - float resb = tex2D(sBackBuffer,pos + dx*CONV_B).b; - float resg = tex2D(sBackBuffer,pos + dx*CONV_G).g; - - float3 res = float3( res0.r*(1.0-C_STR) + resr*C_STR, - res0.g*(1.0-C_STR) + resg*C_STR, - res0.b*(1.0-C_STR) + resb*C_STR - ); -// Vignette - float x = 0.0; - if (vig == true){ - x = vTexCoord.x*scale.x-0.5; -// x = vTexCoord.x-0.5; - x = x*x;} - - float l = dot(float3(BR_DEP,BR_DEP,BR_DEP),res); - - // Color Spaces - if(EXT_GAMMA != 1.0) res *= res; - if (c_space != 0.0) { - if (c_space == 1.0) res = mul(PAL,res); - if (c_space == 2.0) res = mul(NTSC,res); - if (c_space == 3.0) res = mul(NTSC_J,res); -// Apply CRT-like luminances - res /= float3(0.24,0.69,0.07); - res *= float3(0.29,0.6,0.11); - res = clamp(res,0.0,1.0); - } - float s = frac(bpos.y*SourceSize.y-0.5); -// handle interlacing - if (SourceSize.y > 400.0) - { - s = frac(bpos.y*SourceSize.y/2.0-0.5); -// if (INTERLACE == 1.0) s = mod(float(FrameCount),2.0) < 1.0 ? s: s+0.5; - if (INTERLACE == true) s = (float(FrameCount) % 2.0) < 1.0 ? s: s+0.5; - } -// Calculate CRT-Geom scanlines weight and apply - float weight = scanlineWeights(s, res, x); - float weight2 = scanlineWeights(1.0-s, res, x); - res *= weight + weight2; - -// Masks - float2 xy = vTexCoord*OutputSize.xy*scale/MSIZE; -// float2 xy = vTexCoord*OutputSize.xy/MSIZE; - float CGWG = lerp(Maskl, Maskh, l); - res *= Mask(xy, CGWG); -// Apply slot mask on top of Trinitron-like mask - if (SLOT == true) res *= lerp(slot(xy/2.0),float3(1.0,1.0,1.0),CGWG); - - if (POTATO == 0.0) res = inv_gamma(res,pwr); - else {res = sqrt(res); res *= lerp(1.3,1.1,l);} - -// Saturation - float lum = dot(float3(0.29,0.60,0.11),res); - res = lerp(float3(lum,lum,lum),res,SATURATION); - -// Brightness, Hue and Black Level - res *= BRIGHTNESS_; - res = mul(hue,res); - res -= float3(BLACK,BLACK,BLACK); - res *= blck; -// Apply bezel code, adapted from New-Pixie - if (bzl == true) - res.rgb = lerp(res.rgb, lerp(max(res.rgb, 0.0), pow( abs(bez.rgb), float3( 1.4,1.4,1.4 ) ), bez.w * bez.w), float3( 1.0,1.0,1.0 ) ); - - - return float4(res, 1.0); -} - - - -technique CRT_CYCLON -{ - pass PS_CRT_CYCLON - { - VertexShader = PostProcessVS; - PixelShader = CRT_CYCLON_PS; - } -} diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-geo-zfast.fx b/data/resources/shaders/reshade/Shaders/crt/crt-geo-zfast.fx deleted file mode 100644 index f9bb3418c..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-geo-zfast.fx +++ /dev/null @@ -1,150 +0,0 @@ -#include "ReShade.fxh" - -/* - zfast_crt_geo - A simple, fast CRT shader. - - Copyright (C) 2017 Greg Hogan (SoltanGris42) - Copyright (C) 2023 Jose Linares (Dogway) - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - -Notes: This shader does scaling with a weighted linear filter - based on the algorithm by Iñigo Quilez here: - https://iquilezles.org/articles/texture/ - but modified to be somewhat sharper. Then a scanline effect that varies - based on pixel brightness is applied along with a monochrome aperture mask. - This shader runs at ~60fps on the Chromecast HD (10GFlops) on a 1080p display. - (https://forums.libretro.com/t/android-googletv-compatible-shaders-nitpicky) - -Dogway: I modified zfast_crt.glsl shader to include screen curvature, - vignetting, round corners and phosphor*temperature. Horizontal pixel is left out - from the Quilez' algo (read above) to provide a more S-Video like horizontal blur. - The scanlines and mask are also now performed in the recommended linear light. - For this to run smoothly on GPU deprived platforms like the Chromecast and - older consoles, I had to remove several parameters and hardcode them into the shader. - Another POV is to run the shader on handhelds like the Switch or SteamDeck so they consume less battery. - -*/ - - -uniform float SCANLINE_WEIGHT < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 15.0; - ui_step = 0.5; - ui_label = "Scanline Amount"; -> = 7.0; - -uniform float MASK_DARK < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Mask Effect Amount"; -> = 0.5; - -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float BufferWidth < source = "bufferwidth"; >; -uniform float BufferHeight < source = "bufferheight"; >; - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=LINEAR;MinFilter=LINEAR;}; - -struct ST_VertexOut -{ - float2 invDims : TEXCOORD1; -}; - -// Vertex shader generating a triangle covering the entire screen -void VS_CRT_Geo_zFast(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - vVARS.invDims = NormalizedNativePixelSize; -} - - -#define MSCL (BufferHeight > 1499.0 ? 0.3333 : 0.5) -// This compensates the scanline+mask embedded gamma from the beam dynamics -#define pwr ((1.0/((-0.0325*SCANLINE_WEIGHT+1.0)*(-0.311*MASK_DARK+1.0))-1.2).xxx) - - - -// NTSC-J (D93) -> Rec709 D65 Joint Matrix (with D93 simulation) -// This is compensated for a linearization hack (RGB*RGB and then sqrt()) -static const float3x3 P22D93 = float3x3( - 1.00000, 0.00000, -0.06173, - 0.07111, 0.96887, -0.01136, - 0.00000, 0.08197, 1.07280); - - -// Returns gamma corrected output, compensated for scanline+mask embedded gamma -float3 inv_gamma(float3 col, float3 power) -{ - float3 cir = col-1.0; - cir *= cir; - col = lerp(sqrt(col),sqrt(1.0-cir),power); - return col; -} - -float2 Warp(float2 pos) -{ - pos = pos*2.0-1.0; - pos *= float2(1.0 + (pos.y*pos.y)*0.0276, 1.0 + (pos.x*pos.x)*0.0414); - return pos*0.5 + 0.5; -} - - -float4 PS_CRT_Geo_zFast(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0, in ST_VertexOut vVARS) : SV_Target -{ - float2 pos = vTexCoord; - float2 xy = Warp(pos); - - float2 corn = min(xy,1.0-xy); // This is used to mask the rounded - corn.x = 0.0001/corn.x; // corners later on - - pos *= (1.0 - pos.xy); - float vig = pos.x * pos.y * 46.0; - vig = min(sqrt(vig), 1.0); - - - // Of all the pixels that are mapped onto the texel we are - // currently rendering, which pixel are we currently rendering? - float ratio_scale = xy.y / NormalizedNativePixelSize.y - 0.5; - // Snap to the center of the underlying texel. - float i = floor(ratio_scale) + 0.5; - - // This is just like "Quilez Scaling" but sharper - float f = ratio_scale - i; - float Y = f*f; - float p = (i + 4.0*Y*f)*vVARS.invDims.y; - - float whichmask = floor(vTexCoord.x*BufferWidth)*(-MSCL); - float mask = 1.0 + float(frac(whichmask) < MSCL)*(-MASK_DARK); - float3 colour = tex2D(sBackBuffer, float2(xy.x,p)).rgb; - - colour = max(mul(P22D93 * vig, colour*colour), 0.0.xxx); - - float scanLineWeight = (1.5 - SCANLINE_WEIGHT*(Y - Y*Y)); - - if (corn.y <= corn.x || corn.x < 0.0001 ) - colour = 0.0.xxx; - - return float4(inv_gamma(colour.rgb*lerp(scanLineWeight*mask, 1.0, colour.r*0.26667+colour.g*0.26667+colour.b*0.26667),pwr),1.0); -} - - - -technique CRT_Geo_zFast -{ - pass - { - VertexShader = VS_CRT_Geo_zFast; - PixelShader = PS_CRT_Geo_zFast; - } -} diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-geom.fx b/data/resources/shaders/reshade/Shaders/crt/crt-geom.fx deleted file mode 100644 index 807fffdd2..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-geom.fx +++ /dev/null @@ -1,654 +0,0 @@ -#include "ReShade.fxh" - -/* - CRT-interlaced - - Copyright (C) 2010-2012 cgwg, Themaister and DOLLS - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - (cgwg gave their consent to have the original version of this shader - distributed under the GPL in this message: - - http://board.byuu.org/viewtopic.php?p=26075#p26075 - - "Feel free to distribute my shaders under the GPL. After all, the - barrel distortion code was taken from the Curvature shader, which is - under the GPL." - ) - This shader variant is pre-configured with screen curvature -*/ - - -uniform float CRTgamma < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 5.0; - ui_step = 0.1; - ui_label = "CRTGeom Target Gamma"; -> = 2.4; - -uniform float monitorgamma < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 5.0; - ui_step = 0.1; - ui_label = "CRTGeom Monitor Gamma"; -> = 2.2; - -uniform float d < - ui_type = "drag"; - ui_category = "Curvature"; - ui_min = 0.1; - ui_max = 3.0; - ui_step = 0.1; - ui_label = "CRTGeom Distance"; -> = 1.5; - -uniform bool CURVATURE < - ui_category = "Curvature"; - ui_type = "radio"; - ui_label = "CRTGeom Curvature Toggle"; -> = true; - -uniform bool invert_aspect < - ui_type = "radio"; - ui_category = "Curvature"; - ui_label = "CRTGeom Curvature Aspect Inversion"; -> = false; - -uniform float R < - ui_type = "drag"; - ui_category = "Curvature"; - ui_min = 0.1; - ui_max = 10.0; - ui_step = 0.1; - ui_label = "CRTGeom Curvature Radius"; -> = 2.0; - -uniform float cornersize < - ui_type = "drag"; - ui_category = "Curvature"; - ui_min = 0.001; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "CRTGeom Corner Size"; -> = 0.03; - -uniform float cornersmooth < - ui_type = "drag"; - ui_category = "Curvature"; - ui_min = 80.0; - ui_max = 2000.0; - ui_step = 100.0; - ui_label = "CRTGeom Corner Smoothness"; -> = 1000.0; - -uniform float x_tilt < - ui_type = "drag"; - ui_category = "Curvature"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "CRTGeom Horizontal Tilt"; -> = 0.0; - -uniform float y_tilt < - ui_type = "drag"; - ui_category = "Curvature"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "CRTGeom Vertical Tilt"; -> = 0.0; - -uniform float overscan_x < - ui_type = "drag"; - ui_min = -125.0; - ui_max = 125.0; - ui_step = 0.5; - ui_label = "CRTGeom Horiz. Overscan %"; -> = 100.0; - -uniform float overscan_y < - ui_type = "drag"; - ui_min = -125.0; - ui_max = 125.0; - ui_step = 0.5; - ui_label = "CRTGeom Vert. Overscan %"; -> = 100.0; - -uniform float centerx < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 0.1; - ui_label = "Image Center X"; -> = 0.00; - -uniform float centery < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 0.1; - ui_label = "Image Center Y"; -> = 0.00; - -uniform float DOTMASK < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "CRTGeom Dot Mask Strength"; -> = 0.3; - -uniform float SHARPER < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 3.0; - ui_step = 1.0; - ui_label = "CRTGeom Sharpness"; -> = 1.0; - -uniform float scanline_weight < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 0.5; - ui_step = 0.05; - ui_label = "CRTGeom Scanline Weight"; -> = 0.3; - -uniform bool vertical_scanlines < - ui_type = "radio"; - ui_label = "CRTGeom Vertical Scanlines"; -> = false; - -uniform float lum < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "CRTGeom Luminance"; -> = 0.0; - -uniform float interlace_detect < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "CRTGeom Interlacing Simulation"; -> = 1.0; - - - -uniform float FrameCount < source = "framecount"; >; -uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >; -uniform float2 InternalPixelSize < source = "internal_pixel_size"; >; -uniform float2 NativePixelSize < source = "native_pixel_size"; >; -uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >; -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float UpscaleMultiplier < source = "upscale_multiplier"; >; -uniform float2 ViewportSize < source = "viewportsize"; >; -uniform float ViewportWidth < source = "viewportwidth"; >; -uniform float ViewportHeight < source = "viewportheight"; >; - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;}; - -// Comment the next line to disable interpolation in linear gamma (and -// gain speed). -#define LINEAR_PROCESSING - -// Enable 3x oversampling of the beam profile; improves moire effect caused by scanlines+curvature -#define OVERSAMPLE - -// Use the older, purely gaussian beam profile; uncomment for speed -//#define USEGAUSSIAN - -// Macros. -#define FIX(c) max(abs(c), 1e-5); -#define PI 3.141592653589 - -#ifdef LINEAR_PROCESSING -# define TEX2D(c) pow(tex2D(sBackBuffer, (c)), float4(CRTgamma,CRTgamma,CRTgamma,CRTgamma)) -#else -# define TEX2D(c) tex2D(sBackBuffer, (c)) -#endif - -// aspect ratio -#define aspect (invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth)) -#define overscan (float2(1.01,1.01)); - - -struct ST_VertexOut -{ - float2 sinangle : TEXCOORD1; - float2 cosangle : TEXCOORD2; - float3 stretch : TEXCOORD3; - float2 ilfac : TEXCOORD4; - float2 one : TEXCOORD5; - float mod_factor : TEXCOORD6; - float2 TextureSize : TEXCOORD7; -}; - - -float vs_intersect(float2 xy, float2 sinangle, float2 cosangle) -{ - float A = dot(xy,xy) + d*d; - float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d); - float C = d*d + 2.0*R*d*cosangle.x*cosangle.y; - - return (-B-sqrt(B*B-4.0*A*C))/(2.0*A); -} - -float2 vs_bkwtrans(float2 xy, float2 sinangle, float2 cosangle) -{ - float c = vs_intersect(xy, sinangle, cosangle); - float2 point = (float2(c, c)*xy - float2(-R, -R)*sinangle) / float2(R, R); - float2 poc = point/cosangle; - - float2 tang = sinangle/cosangle; - float A = dot(tang, tang) + 1.0; - float B = -2.0*dot(poc, tang); - float C = dot(poc, poc) - 1.0; - - float a = (-B + sqrt(B*B - 4.0*A*C))/(2.0*A); - float2 uv = (point - a*sinangle)/cosangle; - float r = FIX(R*acos(a)); - - return uv*r/sin(r/R); -} - -float2 vs_fwtrans(float2 uv, float2 sinangle, float2 cosangle) -{ - float r = FIX(sqrt(dot(uv,uv))); - uv *= sin(r/R)/r; - float x = 1.0-cos(r/R); - float D = d/R + x*cosangle.x*cosangle.y+dot(uv,sinangle); - - return d*(uv*cosangle-x*sinangle)/D; -} - -float3 vs_maxscale(float2 sinangle, float2 cosangle) -{ - float2 c = vs_bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y), sinangle, cosangle); - float2 a = float2(0.5,0.5)*aspect; - - float2 lo = float2(vs_fwtrans(float2(-a.x, c.y), sinangle, cosangle).x, - vs_fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect; - - float2 hi = float2(vs_fwtrans(float2(+a.x, c.y), sinangle, cosangle).x, - vs_fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect; - - return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y)); -} - -// Code snippet borrowed from crt-cyclon. (credits to DariusG) -float2 Warp(float2 pos) -{ - pos = pos*2.0 - 1.0; - pos *= float2(1.0 + pos.y*pos.y*0, 1.0 + pos.x*pos.x*0); - pos = pos*0.5 + 0.5; - - return pos; -} - - -// Vertex shader generating a triangle covering the entire screen -void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - // center screen - texcoord = Warp(texcoord - float2(centerx,centery)/100.0); - - float2 SourceSize = 1.0/NormalizedNativePixelSize; - float2 OutputSize = ViewportSize*BufferToViewportRatio; - - // Precalculate a bunch of useful values we'll need in the fragment - // shader. - vVARS.sinangle = sin(float2(x_tilt, y_tilt)); - vVARS.cosangle = cos(float2(x_tilt, y_tilt)); - vVARS.stretch = vs_maxscale(vVARS.sinangle, vVARS.cosangle); - - if(vertical_scanlines == false) - { - vVARS.TextureSize = float2(SHARPER * SourceSize.x, SourceSize.y); - - vVARS.ilfac = float2(1.0, clamp(floor(SourceSize.y/(interlace_detect > 0.5 ? 200.0 : 1000)), 1.0, 2.0)); - - // The size of one texel, in texture-coordinates. - vVARS.one = vVARS.ilfac / vVARS.TextureSize; - - // Resulting X pixel-coordinate of the pixel we're drawing. - vVARS.mod_factor = texcoord.x * SourceSize.x * OutputSize.x / SourceSize.x; - }else{ - vVARS.TextureSize = float2(SourceSize.x, SHARPER * SourceSize.y); - - vVARS.ilfac = float2(clamp(floor(SourceSize.x/(interlace_detect > 0.5 ? 200.0 : 1000)), 1.0, 2.0), 1.0); - - // The size of one texel, in texture-coordinates. - vVARS.one = vVARS.ilfac / vVARS.TextureSize; - - // Resulting X pixel-coordinate of the pixel we're drawing. - vVARS.mod_factor = texcoord.y * SourceSize.y * OutputSize.y / SourceSize.y; - } -} - - - -float intersect(float2 xy, float2 sinangle, float2 cosangle) -{ - float A = dot(xy,xy) + d*d; - float B, C; - - if(vertical_scanlines == false) - { - B = 2.0*(R*(dot(xy,sinangle) - d*cosangle.x*cosangle.y) - d*d); - C = d*d + 2.0*R*d*cosangle.x*cosangle.y; - }else{ - B = 2.0*(R*(dot(xy,sinangle) - d*cosangle.y*cosangle.x) - d*d); - C = d*d + 2.0*R*d*cosangle.y*cosangle.x; - } - - return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A); -} - -float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle) -{ - float c = intersect(xy, sinangle, cosangle); - float2 point = (float2(c, c)*xy - float2(-R, -R)*sinangle) / float2(R, R); - float2 poc = point/cosangle; - float2 tang = sinangle/cosangle; - - float A = dot(tang, tang) + 1.0; - float B = -2.0*dot(poc, tang); - float C = dot(poc, poc) - 1.0; - - float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A); - float2 uv = (point - a*sinangle) / cosangle; - float r = FIX(R*acos(a)); - - return uv*r/sin(r/R); -} - -float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle) -{ - float r = FIX(sqrt(dot(uv, uv))); - uv *= sin(r/R)/r; - float x = 1.0 - cos(r/R); - float D; - - if(vertical_scanlines == false) - D = d/R + x*cosangle.x*cosangle.y + dot(uv,sinangle); - else - D = d/R + x*cosangle.y*cosangle.x + dot(uv,sinangle); - - return d*(uv*cosangle - x*sinangle)/D; -} - -float3 maxscale(float2 sinangle, float2 cosangle) -{ - if(vertical_scanlines == false) - { - float2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y), sinangle, cosangle); - float2 a = float2(0.5, 0.5)*aspect; - - float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x, - fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect; - float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x, - fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect; - - return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y)); - }else{ - float2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.y*cosangle.x), sinangle, cosangle); - float2 a = float2(0.5, 0.5)*aspect; - - float2 lo = float2(fwtrans(float2(-a.y, c.x), sinangle, cosangle).y, - fwtrans(float2( c.y, -a.x), sinangle, cosangle).x)/aspect; - float2 hi = float2(fwtrans(float2(+a.y, c.x), sinangle, cosangle).y, - fwtrans(float2( c.y, +a.x), sinangle, cosangle).x)/aspect; - - return float3((hi+lo)*aspect*0.5,max(hi.y-lo.y, hi.x-lo.x)); - } -} - -// Calculate the influence of a scanline on the current pixel. -// -// 'distance' is the distance in texture coordinates from the current -// pixel to the scanline in question. -// 'color' is the colour of the scanline at the horizontal location of -// the current pixel. -float4 scanlineWeights(float distance, float4 color) -{ - // "wid" controls the width of the scanline beam, for each RGB - // channel The "weights" lines basically specify the formula - // that gives you the profile of the beam, i.e. the intensity as - // a function of distance from the vertical center of the - // scanline. In this case, it is gaussian if width=2, and - // becomes nongaussian for larger widths. Ideally this should - // be normalized so that the integral across the beam is - // independent of its width. That is, for a narrower beam - // "weights" should have a higher peak at the center of the - // scanline than for a wider beam. - #ifdef USEGAUSSIAN - float4 wid = 0.3 + 0.1 * pow(color, float4(3.0, 3.0, 3.0, 3.0)); - float dsw = distance / scanline_weight; - float4 weights = float4(dsw, dsw, dsw, dsw); - - return (lum + 0.4) * exp(-weights * weights) / wid; - #else - float4 wid = 2.0 + 2.0 * pow(color, float4(4.0, 4.0, 4.0, 4.0)); - float dsw = distance / scanline_weight; - float4 weights = float4(dsw, dsw, dsw, dsw); - - return (lum + 1.4) * exp(-pow(weights * rsqrt(0.5 * wid), wid)) / (0.6 + 0.2 * wid); - #endif -} - -float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch) -{ - coord = (coord - float2(0.5, 0.5))*aspect*stretch.z + stretch.xy; - - return (bkwtrans(coord, sinangle, cosangle) / - float2(overscan_x / 100.0, overscan_y / 100.0)/aspect + float2(0.5, 0.5)); -} - -float corner(float2 coord) -{ - coord = min(coord, float2(1.0, 1.0) - coord) * aspect; - float2 cdist = float2(cornersize, cornersize); - coord = (cdist - min(coord, cdist)); - float dist = sqrt(dot(coord, coord)); - - if(vertical_scanlines == false) - return clamp((cdist.x - dist)*cornersmooth, 0.0, 1.0); - else - return clamp((cdist.y - dist)*cornersmooth, 0.0, 1.0); -} - -float fwidth(float value){ - return abs(ddx(value)) + abs(ddy(value)); -} - - - -float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target -{ - // Here's a helpful diagram to keep in mind while trying to - // understand the code: - // - // | | | | | - // ------------------------------- - // | | | | | - // | 01 | 11 | 21 | 31 | <-- current scanline - // | | @ | | | - // ------------------------------- - // | | | | | - // | 02 | 12 | 22 | 32 | <-- next scanline - // | | | | | - // ------------------------------- - // | | | | | - // - // Each character-cell represents a pixel on the output - // surface, "@" represents the current pixel (always somewhere - // in the bottom half of the current scan-line, or the top-half - // of the next scanline). The grid of lines represents the - // edges of the texels of the underlying texture. - - // Texture coordinates of the texel containing the active pixel. - float2 xy; - - if (CURVATURE == true) - xy = transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch); - else - xy = vTexCoord; - - float cval = corner((xy-float2(0.5,0.5)) * BufferToViewportRatio + float2(0.5,0.5)); - - // Of all the pixels that are mapped onto the texel we are - // currently rendering, which pixel are we currently rendering? - float2 ilvec; - if(vertical_scanlines == false) - ilvec = float2(0.0, vVARS.ilfac.y * interlace_detect > 1.5 ? (float(FrameCount) % 2.0) : 0.0); - else - ilvec = float2(vVARS.ilfac.x * interlace_detect > 1.5 ? (float(FrameCount) % 2.0) : 0.0, 0.0); - - float2 ratio_scale = (xy * vVARS.TextureSize - float2(0.5, 0.5) + ilvec) / vVARS.ilfac; - float2 uv_ratio = frac(ratio_scale); - - // Snap to the center of the underlying texel. - xy = (floor(ratio_scale)*vVARS.ilfac + float2(0.5, 0.5) - ilvec) / vVARS.TextureSize; - - // Calculate Lanczos scaling coefficients describing the effect - // of various neighbour texels in a scanline on the current - // pixel. - float4 coeffs; - if(vertical_scanlines == false) - coeffs = PI * float4(1.0 + uv_ratio.x, uv_ratio.x, 1.0 - uv_ratio.x, 2.0 - uv_ratio.x); - else - coeffs = PI * float4(1.0 + uv_ratio.y, uv_ratio.y, 1.0 - uv_ratio.y, 2.0 - uv_ratio.y); - - // Prevent division by zero. - coeffs = FIX(coeffs); - - // Lanczos2 kernel. - coeffs = 2.0 * sin(coeffs) * sin(coeffs / 2.0) / (coeffs * coeffs); - - // Normalize. - coeffs /= dot(coeffs, float4(1.0, 1.0, 1.0, 1.0)); - - // Calculate the effective colour of the current and next - // scanlines at the horizontal location of the current pixel, - // using the Lanczos coefficients above. - float4 col, col2; - if(vertical_scanlines == false) - { - col = clamp( - mul(coeffs, float4x4( - TEX2D(xy + float2(-vVARS.one.x, 0.0)), - TEX2D(xy), - TEX2D(xy + float2(vVARS.one.x, 0.0)), - TEX2D(xy + float2(2.0 * vVARS.one.x, 0.0)) - )), - 0.0, 1.0 - ); - col2 = clamp( - mul(coeffs, float4x4( - TEX2D(xy + float2(-vVARS.one.x, vVARS.one.y)), - TEX2D(xy + float2(0.0, vVARS.one.y)), - TEX2D(xy + vVARS.one), - TEX2D(xy + float2(2.0 * vVARS.one.x, vVARS.one.y)) - )), - 0.0, 1.0 - ); - }else{ - col = clamp( - mul(coeffs, float4x4( - TEX2D(xy + float2(0.0, -vVARS.one.y)), - TEX2D(xy), - TEX2D(xy + float2(0.0, vVARS.one.y)), - TEX2D(xy + float2(0.0, 2.0 * vVARS.one.y)) - )), - 0.0, 1.0 - ); - col2 = clamp( - mul(coeffs, float4x4( - TEX2D(xy + float2(vVARS.one.x, -vVARS.one.y)), - TEX2D(xy + float2(vVARS.one.x, 0.0)), - TEX2D(xy + vVARS.one), - TEX2D(xy + float2(vVARS.one.x, 2.0 * vVARS.one.y)) - )), - 0.0, 1.0 - ); - } - -#ifndef LINEAR_PROCESSING - col = pow(col , float4(CRTgamma, CRTgamma, CRTgamma, CRTgamma)); - col2 = pow(col2, float4(CRTgamma, CRTgamma, CRTgamma, CRTgamma)); -#endif - - // Calculate the influence of the current and next scanlines on - // the current pixel. - float4 weights, weights2; - if(vertical_scanlines == false) - { - weights = scanlineWeights(uv_ratio.y, col); - weights2 = scanlineWeights(1.0 - uv_ratio.y, col2); - - #ifdef OVERSAMPLE - float filter = fwidth(ratio_scale.y); - uv_ratio.y = uv_ratio.y + 1.0/3.0*filter; - weights = (weights + scanlineWeights(uv_ratio.y, col))/3.0; - weights2 = (weights2 + scanlineWeights(abs(1.0 - uv_ratio.y), col2))/3.0; - uv_ratio.y = uv_ratio.y - 2.0/3.0*filter; - weights = weights + scanlineWeights(abs(uv_ratio.y), col)/3.0; - weights2 = weights2 + scanlineWeights(abs(1.0 - uv_ratio.y), col2)/3.0; - #endif - }else{ - weights = scanlineWeights(uv_ratio.x, col); - weights2 = scanlineWeights(1.0 - uv_ratio.x, col2); - - #ifdef OVERSAMPLE - float filter = fwidth(ratio_scale.x); - uv_ratio.x = uv_ratio.x + 1.0/3.0*filter; - weights = (weights + scanlineWeights(uv_ratio.x, col))/3.0; - weights2 = (weights2 + scanlineWeights(abs(1.0 - uv_ratio.x), col2))/3.0; - uv_ratio.x = uv_ratio.x - 2.0/3.0*filter; - weights = weights + scanlineWeights(abs(uv_ratio.x), col)/3.0; - weights2 = weights2 + scanlineWeights(abs(1.0 - uv_ratio.x), col2)/3.0; - #endif - } - - float3 mul_res = (col * weights + col2 * weights2).rgb; - mul_res *= float3(cval, cval, cval); - - // dot-mask emulation: - // Output pixels are alternately tinted green and magenta. - float3 dotMaskWeights = lerp( - float3(1.0, 1.0 - DOTMASK, 1.0), - float3(1.0 - DOTMASK, 1.0, 1.0 - DOTMASK), - floor((vVARS.mod_factor % 2.0)) - ); - - mul_res *= dotMaskWeights; - - // Convert the image gamma for display on our output device. - mul_res = pow(mul_res, float3(1.0 / monitorgamma, 1.0 / monitorgamma, 1.0 / monitorgamma)); - - return float4(mul_res, 1.0); -} - - -technique CRT_Geom -{ - pass - { - VertexShader = VS_CRT_Geom; - PixelShader = PS_CRT_Geom; - } -} diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale.fx b/data/resources/shaders/reshade/Shaders/crt/crt-royale.fx deleted file mode 100644 index 2b5e160b4..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale.fx +++ /dev/null @@ -1,521 +0,0 @@ -#include "ReShade.fxh" - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -// Ported to Duckstation (ReShade specs) by Hyllian (2024). - -// Set shader params for all passes here: - -uniform float crt_gamma < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 0.025; - ui_label = "Simulated CRT Gamma"; - ui_category = "Display Settings"; -> = 2.5; - -uniform float lcd_gamma < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 5.0; - ui_step = 0.025; - ui_label = "Your Display Gamma"; - ui_category = "Display Settings"; -> = 2.2; - -uniform float levels_contrast < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 4.0; - ui_step = 0.015625; - ui_label = "Contrast"; - ui_category = "Display Settings"; -> = 1.0; - -uniform float halation_weight < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Halation Weight"; - ui_category = "Effects"; -> = 0.0; - -uniform float diffusion_weight < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Diffusion Weight"; - ui_category = "Effects"; -> = 0.075; - -uniform float bloom_underestimate_levels < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 5.0; - ui_step = 0.01; - ui_label = "Bloom - Underestimate Levels"; - ui_category = "Effects"; -> = 0.8; - -uniform float bloom_excess < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Bloom - Excess"; - ui_category = "Effects"; -> = 0.0; - -uniform float beam_min_sigma < - ui_type = "drag"; - ui_min = 0.005; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Min Sigma"; - ui_category = "Beam Dynamics"; -> = 0.02; - -uniform float beam_max_sigma < - ui_type = "drag"; - ui_min = 0.005; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Max Sigma"; - ui_category = "Beam Dynamics"; -> = 0.3; - -uniform float beam_spot_power < - ui_type = "drag"; - ui_min = 0.01; - ui_max = 16.0; - ui_step = 0.01; - ui_label = "Spot Power"; - ui_category = "Beam Dynamics"; -> = 0.33; - -uniform float beam_min_shape < - ui_type = "drag"; - ui_min = 2.0; - ui_max = 32.0; - ui_step = 0.1; - ui_label = "Min Shape"; - ui_category = "Beam Dynamics"; -> = 2.0; - -uniform float beam_max_shape < - ui_type = "drag"; - ui_min = 2.0; - ui_max = 32.0; - ui_step = 0.1; - ui_label = "Max Shape"; - ui_category = "Beam Dynamics"; -> = 4.0; - -uniform float beam_shape_power < - ui_type = "drag"; - ui_min = 0.01; - ui_max = 16.0; - ui_step = 0.01; - ui_label = "Shape Power"; - ui_category = "Beam Dynamics"; -> = 0.25; - -uniform int beam_horiz_filter < - ui_type = "combo"; - ui_items = "Quilez\0Gaussian\0Lanczos\0"; - ui_label = "Horizontal Filter"; - ui_category = "Beam Dynamics"; -> = 0; - -uniform float beam_horiz_sigma < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 0.67; - ui_step = 0.005; - ui_label = "Horizontal Sigma"; - ui_category = "Beam Dynamics"; -> = 0.35; - -uniform float beam_horiz_linear_rgb_weight < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - ui_label = "Horiz Linear RGB Weight"; - ui_category = "Beam Dynamics"; -> = 1.0; - -uniform float convergence_offset_x_r < - ui_type = "drag"; - ui_min = -4.0; - ui_max = 4.0; - ui_step = 0.05; - ui_label = "Offset X Red"; - ui_category = "Convergence"; -> = 0.0; - -uniform float convergence_offset_x_g < - ui_type = "drag"; - ui_min = -4.0; - ui_max = 4.0; - ui_step = 0.05; - ui_label = "Offset X Green"; - ui_category = "Convergence"; -> = 0.0; - -uniform float convergence_offset_x_b < - ui_type = "drag"; - ui_min = -4.0; - ui_max = 4.0; - ui_step = 0.05; - ui_label = "Offset X Blue"; - ui_category = "Convergence"; -> = 0.0; - -uniform float convergence_offset_y_r < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Offset Y Red"; - ui_category = "Convergence"; -> = 0.0; - -uniform float convergence_offset_y_g < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Offset Y Green"; - ui_category = "Convergence"; -> = 0.0; - -uniform float convergence_offset_y_b < - ui_type = "drag"; - ui_min = -2.0; - ui_max = 2.0; - ui_step = 0.05; - ui_label = "Offset Y Blue"; - ui_category = "Convergence"; -> = 0.0; - -uniform int mask_type < - ui_type = "combo"; - ui_items = "Aperture Grille\0Slot Mask\0Shadow Mask\0"; - ui_label = "Type"; - ui_category = "Mask"; -> = 0; - -uniform float mask_sample_mode_desired < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 2.0; - ui_step = 1.; - ui_label = "Sample Mode"; - ui_category = "Mask"; -> = 0.0; - -uniform float mask_specify_num_triads < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 1.0; - ui_label = "Specify Number of Triads"; - ui_category = "Mask"; -> = 0.0; - -uniform float mask_triad_size_desired < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 18.0; - ui_step = 0.125; - ui_label = "Triad Size Desired"; - ui_category = "Mask"; -> = 3.0; - -uniform float mask_num_triads_desired < - ui_type = "drag"; - ui_min = 342.0; - ui_max = 1920.0; - ui_step = 1.0; - ui_label = "Number of Triads Desired"; - ui_category = "Mask"; -> = 480.0; - -uniform bool interlace_detect < - ui_type = "radio"; - ui_label = "Enable Interlacing Detection"; - ui_category = "Interlacing"; -> = true; - -uniform bool interlace_bff < - ui_type = "radio"; - ui_label = "Bottom Field First"; - ui_category = "Interlacing"; -> = false; - -uniform bool interlace_1080i < - ui_type = "radio"; - ui_label = "Detect 1080i"; - ui_category = "Interlacing"; -> = false; - - -uniform float FrameCount < source = "framecount"; >; -uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >; -uniform float2 InternalPixelSize < source = "internal_pixel_size"; >; -uniform float2 NativePixelSize < source = "native_pixel_size"; >; -uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >; -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float UpscaleMultiplier < source = "upscale_multiplier"; >; -uniform float2 ViewportSize < source = "viewportsize"; >; -uniform float ViewportWidth < source = "viewportwidth"; >; -uniform float ViewportHeight < source = "viewportheight"; >; - -#include "../misc/include/geom.fxh" - -#define VIEWPORT_SIZE (ViewportSize*BufferToViewportRatio) -#define TEXTURE_SIZE (1.0/NormalizedNativePixelSize) - -#define ORIG_LINEARIZED_texture_size TEXTURE_SIZE -#define VERTICAL_SCANLINES_texture_size TEXTURE_SIZE -#define BLOOM_APPROX_texture_size TEXTURE_SIZE -#define BLUR9FAST_VERTICAL_texture_size TEXTURE_SIZE -#define HALATION_BLUR_texture_size TEXTURE_SIZE -#define MASK_RESIZE_VERT_texture_size TEXTURE_SIZE -#define MASK_RESIZE_texture_size float2(64.0,0.0625*((VIEWPORT_SIZE).y)) -#define MASKED_SCANLINES_texture_size (0.0625*VIEWPORT_SIZE) -#define BRIGHTPASS_texture_size VIEWPORT_SIZE -#define BLOOM_VERTICAL_texture_size VIEWPORT_SIZE -#define BLOOM_HORIZONTAL_texture_size VIEWPORT_SIZE - -#define ORIG_LINEARIZED_video_size ORIG_LINEARIZED_texture_size -#define VERTICAL_SCANLINES_video_size VERTICAL_SCANLINES_texture_size -#define BLOOM_APPROX_video_size BLOOM_APPROX_texture_size -#define BLUR9FAST_VERTICAL_video_size BLUR9FAST_VERTICAL_texture_size -#define HALATION_BLUR_video_size HALATION_BLUR_texture_size -#define MASK_RESIZE_VERT_video_size MASK_RESIZE_VERT_texture_size -#define MASK_RESIZE_video_size MASK_RESIZE_texture_size -#define MASKED_SCANLINES_video_size MASKED_SCANLINES_texture_size -#define BRIGHTPASS_video_size BRIGHTPASS_texture_size -#define BLOOM_VERTICAL_video_size BLOOM_VERTICAL_texture_size -#define BLOOM_HORIZONTAL_video_size BLOOM_HORIZONTAL_texture_size - -#define video_size texture_size - - -texture2D tmask_grille_texture_small < source = "crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;}; -texture2D tmask_slot_texture_small < source = "crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;}; -texture2D tmask_shadow_texture_small < source = "crt-royale/TileableLinearShadowMaskEDPResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;}; - -texture2D tmask_grille_texture_large < source = "crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png"; > {Width=512.0;Height=512.0;MipLevels=4;}; -texture2D tmask_slot_texture_large < source = "crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png"; > {Width=512.0;Height=512.0;MipLevels=4;}; -texture2D tmask_shadow_texture_large < source = "crt-royale/TileableLinearShadowMaskEDP.png"; > {Width=512.0;Height=512.0;MipLevels=4;}; - -sampler2D mask_grille_texture_small { Texture = tmask_grille_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;}; -sampler2D mask_slot_texture_small { Texture = tmask_slot_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;}; -sampler2D mask_shadow_texture_small { Texture = tmask_shadow_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;}; - -sampler2D mask_grille_texture_large { Texture = tmask_grille_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;}; -sampler2D mask_slot_texture_large { Texture = tmask_slot_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;}; -sampler2D mask_shadow_texture_large { Texture = tmask_shadow_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;}; - - -#ifndef DEBUG_PASSES - #define DEBUG_PASSES 11 -#endif - - - -texture2D tORIG_LINEARIZED{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;}; -sampler2D ORIG_LINEARIZED{Texture=tORIG_LINEARIZED;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;}; - -#if (DEBUG_PASSES > 1) -texture2D tVERTICAL_SCANLINES{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;}; -sampler2D VERTICAL_SCANLINES{Texture=tVERTICAL_SCANLINES;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;}; -#endif -#if (DEBUG_PASSES > 2) -texture2D tBLOOM_APPROX{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;}; -sampler2D BLOOM_APPROX{Texture=tBLOOM_APPROX;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;}; -#endif - -#if (DEBUG_PASSES > 3) -// Need checking if it's really necessary to rendertarget. -texture2D tBLUR9FAST_VERTICAL{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;}; -sampler2D BLUR9FAST_VERTICAL{Texture=tBLUR9FAST_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;}; -#endif -#if (DEBUG_PASSES > 4) - -texture2D tHALATION_BLUR{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;}; -sampler2D HALATION_BLUR{Texture=tHALATION_BLUR;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;}; -#endif -#if (DEBUG_PASSES > 5) - -texture2D tMASK_RESIZE_VERTICAL{Width=64.0;Height=BUFFER_HEIGHT*0.0625;Format=RGBA8;}; -sampler2D MASK_RESIZE_VERTICAL{Texture=tMASK_RESIZE_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;}; -#endif -#if (DEBUG_PASSES > 6) - -texture2D tMASK_RESIZE{Width=BUFFER_WIDTH*0.0625;Height=BUFFER_HEIGHT*0.0625;Format=RGBA8;}; -sampler2D MASK_RESIZE{Texture=tMASK_RESIZE;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;}; -#endif -#if (DEBUG_PASSES > 7) - -texture2D tMASKED_SCANLINES{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;}; -sampler2D MASKED_SCANLINES{Texture=tMASKED_SCANLINES;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;}; -#endif -#if (DEBUG_PASSES > 8) - -texture2D tBRIGHTPASS{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;}; -sampler2D BRIGHTPASS{Texture=tBRIGHTPASS;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;}; -#endif - -#if (DEBUG_PASSES > 9) -texture2D tBLOOM_VERTICAL{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;}; -sampler2D BLOOM_VERTICAL{Texture=tBLOOM_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;}; -#endif - - - -#include "crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh" - -#if (DEBUG_PASSES > 1) -#include "crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh" -#endif -#if (DEBUG_PASSES > 2) -#include "crt-royale/src/crt-royale-bloom-approx.fxh" -#endif -#if (DEBUG_PASSES > 3) -#include "crt-royale/src/blur9fast-vertical.fxh" -#endif -#if (DEBUG_PASSES > 4) -#include "crt-royale/src/blur9fast-horizontal.fxh" -#endif -#if (DEBUG_PASSES > 5) -#include "crt-royale/src/crt-royale-mask-resize-vertical.fxh" -#endif -#if (DEBUG_PASSES > 6) -#include "crt-royale/src/crt-royale-mask-resize-horizontal.fxh" -#endif -#if (DEBUG_PASSES > 7) -#include "crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh" -#endif -#if (DEBUG_PASSES > 8) -#include "crt-royale/src/crt-royale-brightpass.fxh" -#endif -#if (DEBUG_PASSES > 9) -#include "crt-royale/src/crt-royale-bloom-vertical.fxh" -#endif -#if (DEBUG_PASSES > 10) -#include "crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh" -#endif - - -technique CRT_Royale -{ - pass - { - VertexShader = VS_Linearize; - PixelShader = PS_Linearize; - RenderTarget = tORIG_LINEARIZED; - } -#if (DEBUG_PASSES > 1) - pass - { - VertexShader = VS_Scanlines_Vertical_Interlacing; - PixelShader = PS_Scanlines_Vertical_Interlacing; - RenderTarget = tVERTICAL_SCANLINES; - } -#endif -#if (DEBUG_PASSES > 2) - pass - { - VertexShader = VS_Bloom_Approx; - PixelShader = PS_Bloom_Approx; - RenderTarget = tBLOOM_APPROX; - } -#endif -#if (DEBUG_PASSES > 3) - pass - { - VertexShader = VS_Blur9Fast_Vertical; - PixelShader = PS_Blur9Fast_Vertical; - RenderTarget = tBLUR9FAST_VERTICAL; - } -#endif -#if (DEBUG_PASSES > 4) - pass - { - VertexShader = VS_Blur9Fast_Horizontal; - PixelShader = PS_Blur9Fast_Horizontal; - RenderTarget = tHALATION_BLUR; - } -#endif -#if (DEBUG_PASSES > 5) - pass - { - VertexShader = VS_Mask_Resize_Vertical; - PixelShader = PS_Mask_Resize_Vertical; - RenderTarget = tMASK_RESIZE_VERTICAL; - } -#endif -#if (DEBUG_PASSES > 6) - pass - { - VertexShader = VS_Mask_Resize_Horizontal; - PixelShader = PS_Mask_Resize_Horizontal; - RenderTarget = tMASK_RESIZE; - } -#endif -#if (DEBUG_PASSES > 7) - pass - { - VertexShader = VS_Scanlines_Horizontal_Apply_Mask; - PixelShader = PS_Scanlines_Horizontal_Apply_Mask; - RenderTarget = tMASKED_SCANLINES; - } -#endif -#if (DEBUG_PASSES > 8) - pass - { - VertexShader = VS_Brightpass; - PixelShader = PS_Brightpass; - RenderTarget = tBRIGHTPASS; - } -#endif -#if (DEBUG_PASSES > 9) - pass - { - VertexShader = VS_Bloom_Vertical; - PixelShader = PS_Bloom_Vertical; - RenderTarget = tBLOOM_VERTICAL; - } -#endif -#if (DEBUG_PASSES > 10) - pass - { - VertexShader = VS_Bloom_Horizontal; - PixelShader = PS_Bloom_Horizontal; - } -#endif -} diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/LICENSE.TXT b/data/resources/shaders/reshade/Shaders/crt/crt-royale/LICENSE.TXT deleted file mode 100644 index d8cf7d463..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/LICENSE.TXT +++ /dev/null @@ -1,280 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Lesser General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/bind-shader-params.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/bind-shader-params.fxh deleted file mode 100644 index 1a542bc95..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/bind-shader-params.fxh +++ /dev/null @@ -1,249 +0,0 @@ -#ifndef BIND_SHADER_PARAMS_H -#define BIND_SHADER_PARAMS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -#include "helper-functions-and-macros.fxh" -#include "user-settings.fxh" -#include "derived-settings-and-constants.fxh" - -// Override some parameters for gamma-management.h and tex2Dantialias.h: -#define OVERRIDE_DEVICE_GAMMA -static const float gba_gamma = 3.5; // Irrelevant but necessary to define. -#define ANTIALIAS_OVERRIDE_BASICS -#define ANTIALIAS_OVERRIDE_PARAMETERS - -// Disable runtime shader params if the user doesn't explicitly want them. -// Static constants will be defined in place of uniforms of the same name. -#ifndef RUNTIME_SHADER_PARAMS_ENABLE - #undef PARAMETER_UNIFORM -#endif - -// Bind option names to shader parameter uniforms or static constants. -#ifdef PARAMETER_UNIFORM - uniform float crt_gamma; - uniform float lcd_gamma; - uniform float levels_contrast; - uniform float halation_weight; - uniform float diffusion_weight; - uniform float bloom_underestimate_levels; - uniform float bloom_excess; - uniform float beam_min_sigma; - uniform float beam_max_sigma; - uniform float beam_spot_power; - uniform float beam_min_shape; - uniform float beam_max_shape; - uniform float beam_shape_power; - uniform float beam_horiz_sigma; - #ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE - uniform float beam_horiz_filter; - uniform float beam_horiz_linear_rgb_weight; - #else - static const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0); - static const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0); - #endif - uniform float convergence_offset_x_r; - uniform float convergence_offset_x_g; - uniform float convergence_offset_x_b; - uniform float convergence_offset_y_r; - uniform float convergence_offset_y_g; - uniform float convergence_offset_y_b; - #ifdef RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - uniform float mask_type; - #else - static const float mask_type = clamp(mask_type_static, 0.0, 2.0); - #endif - uniform float mask_sample_mode_desired; - uniform float mask_specify_num_triads; - uniform float mask_triad_size_desired; - uniform float mask_num_triads_desired; - uniform float aa_subpixel_r_offset_x_runtime; - uniform float aa_subpixel_r_offset_y_runtime; - #ifdef RUNTIME_ANTIALIAS_WEIGHTS - uniform float aa_cubic_c; - uniform float aa_gauss_sigma; - #else - static const float aa_cubic_c = aa_cubic_c_static; // Clamp to [0, 4]? - static const float aa_gauss_sigma = max(FIX_ZERO(0.0), aa_gauss_sigma_static); // Clamp to [FIXZERO(0), 1]? - #endif - uniform float geom_mode_runtime; - uniform float geom_radius; - uniform float geom_view_dist; - uniform float geom_tilt_angle_x; - uniform float geom_tilt_angle_y; - uniform float geom_aspect_ratio_x; - uniform float geom_aspect_ratio_y; - uniform float geom_overscan_x; - uniform float geom_overscan_y; - uniform float border_size; - uniform float border_darkness; - uniform float border_compress; - uniform float interlace_bff; - uniform float interlace_1080i; -#else - // Use constants from user-settings.h, and limit ranges appropriately: -/* static const float crt_gamma = macro_max(0.0, crt_gamma_static); - static const float lcd_gamma = macro_max(0.0, lcd_gamma_static); - static const float levels_contrast = macro_clamp(levels_contrast_static, 0.0, 4.0); - static const float halation_weight = macro_clamp(halation_weight_static, 0.0, 1.0); - static const float diffusion_weight = macro_clamp(diffusion_weight_static, 0.0, 1.0); - static const float bloom_underestimate_levels = macro_max(FIX_ZERO(0.0), bloom_underestimate_levels_static); - static const float bloom_excess = macro_clamp(bloom_excess_static, 0.0, 1.0); - static const float beam_min_sigma = macro_max(FIX_ZERO(0.0), beam_min_sigma_static); - static const float beam_max_sigma = macro_max(beam_min_sigma, beam_max_sigma_static); - static const float beam_spot_power = macro_max(beam_spot_power_static, 0.0); - static const float beam_min_shape = macro_max(2.0, beam_min_shape_static); - static const float beam_max_shape = macro_max(beam_min_shape, beam_max_shape_static); - static const float beam_shape_power = macro_max(0.0, beam_shape_power_static); - static const float beam_horiz_filter = macro_clamp(beam_horiz_filter_static, 0.0, 2.0); - static const float beam_horiz_sigma = macro_max(FIX_ZERO(0.0), beam_horiz_sigma_static); - static const float beam_horiz_linear_rgb_weight = macro_clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0); -*/ // Unpack static vector elements to match scalar uniforms: -/* static const float convergence_offset_x_r = macro_clamp(convergence_offsets_r_static.x, -4.0, 4.0); - static const float convergence_offset_x_g = macro_clamp(convergence_offsets_g_static.x, -4.0, 4.0); - static const float convergence_offset_x_b = macro_clamp(convergence_offsets_b_static.x, -4.0, 4.0); - static const float convergence_offset_y_r = macro_clamp(convergence_offsets_r_static.y, -4.0, 4.0); - static const float convergence_offset_y_g = macro_clamp(convergence_offsets_g_static.y, -4.0, 4.0); - static const float convergence_offset_y_b = macro_clamp(convergence_offsets_b_static.y, -4.0, 4.0); - static const float mask_type = macro_clamp(mask_type_static, 0.0, 2.0); - static const float mask_sample_mode_desired = macro_clamp(mask_sample_mode_static, 0.0, 2.0); - static const float mask_specify_num_triads = macro_clamp(mask_specify_num_triads_static, 0.0, 1.0); - static const float mask_triad_size_desired = macro_clamp(mask_triad_size_desired_static, 1.0, 18.0); - static const float mask_num_triads_desired = macro_clamp(mask_num_triads_desired_static, 342.0, 1920.0); - static const float aa_subpixel_r_offset_x_runtime = macro_clamp(aa_subpixel_r_offset_static.x, -0.5, 0.5); - static const float aa_subpixel_r_offset_y_runtime = macro_clamp(aa_subpixel_r_offset_static.y, -0.5, 0.5); - static const float aa_cubic_c = aa_cubic_c_static; // Clamp to [0, 4]? - static const float aa_gauss_sigma = macro_max(FIX_ZERO(0.0), aa_gauss_sigma_static); // Clamp to [FIXZERO(0), 1]? - static const float geom_mode_runtime = macro_clamp(geom_mode_static, 0.0, 3.0); - static const float geom_radius = macro_max(1.0/(2.0*pi), geom_radius_static); // Clamp to [1/(2*pi), 1024]? - static const float geom_view_dist = macro_max(0.5, geom_view_dist_static); // Clamp to [0.5, 1024]? - static const float geom_tilt_angle_x = macro_clamp(geom_tilt_angle_static.x, -pi, pi); - static const float geom_tilt_angle_y = macro_clamp(geom_tilt_angle_static.y, -pi, pi); - static const float geom_aspect_ratio_x = geom_aspect_ratio_static; // Force >= 1? - static const float geom_aspect_ratio_y = 1.0; - static const float geom_overscan_x = macro_max(FIX_ZERO(0.0), geom_overscan_static.x); - static const float geom_overscan_y = macro_max(FIX_ZERO(0.0), geom_overscan_static.y); - static const float border_size = macro_clamp(border_size_static, 0.0, 0.5); // 0.5 reaches to image center - static const float border_darkness = macro_max(0.0, border_darkness_static); - static const float border_compress = macro_max(1.0, border_compress_static); // < 1.0 darkens whole image - static const float interlace_bff = float(interlace_bff_static); - static const float interlace_1080i = float(interlace_1080i_static); -*/ -#endif - -/* -// Provide accessors for vector constants that pack scalar uniforms: -float2 get_aspect_vector(const float geom_aspect_ratio) -{ - // Get an aspect ratio vector. Enforce geom_max_aspect_ratio, and prevent - // the absolute scale from affecting the uv-mapping for curvature: - const float geom_clamped_aspect_ratio = - min(geom_aspect_ratio, geom_max_aspect_ratio); - const float2 geom_aspect = - normalize(float2(geom_clamped_aspect_ratio, 1.0)); - return geom_aspect; -} - -float2 get_geom_overscan_vector() -{ - return float2(geom_overscan_x, geom_overscan_y); -} - -float2 get_geom_tilt_angle_vector() -{ - return float2(geom_tilt_angle_x, geom_tilt_angle_y); -} -*/ -float3 get_convergence_offsets_x_vector() -{ - return float3(convergence_offset_x_r, convergence_offset_x_g, - convergence_offset_x_b); -} - -float3 get_convergence_offsets_y_vector() -{ - return float3(convergence_offset_y_r, convergence_offset_y_g, - convergence_offset_y_b); -} - -float2 get_convergence_offsets_r_vector() -{ - return float2(convergence_offset_x_r, convergence_offset_y_r); -} - -float2 get_convergence_offsets_g_vector() -{ - return float2(convergence_offset_x_g, convergence_offset_y_g); -} - -float2 get_convergence_offsets_b_vector() -{ - return float2(convergence_offset_x_b, convergence_offset_y_b); -} -/* -float2 get_aa_subpixel_r_offset() -{ - #ifdef RUNTIME_ANTIALIAS_WEIGHTS - #ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS - // WARNING: THIS IS EXTREMELY EXPENSIVE. - return float2(aa_subpixel_r_offset_x_runtime, - aa_subpixel_r_offset_y_runtime); - #else - return aa_subpixel_r_offset_static; - #endif - #else - return aa_subpixel_r_offset_static; - #endif -} -*/ -// Provide accessors settings which still need "cooking:" -float get_mask_amplify() -{ - static const float mask_grille_amplify = 1.0/mask_grille_avg_color; - static const float mask_slot_amplify = 1.0/mask_slot_avg_color; - static const float mask_shadow_amplify = 1.0/mask_shadow_avg_color; - return mask_type < 0.5 ? mask_grille_amplify : - mask_type < 1.5 ? mask_slot_amplify : - mask_shadow_amplify; -} - -float get_mask_sample_mode() -{ - #ifdef RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE - return mask_sample_mode_desired; - #else - return clamp(mask_sample_mode_desired, 1.0, 2.0); - #endif - #else - #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE - return mask_sample_mode_static; - #else - return clamp(mask_sample_mode_static, 1.0, 2.0); - #endif - #endif -} - - -#endif // BIND_SHADER_PARAMS_H - - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/bloom-functions.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/bloom-functions.fxh deleted file mode 100644 index 54a6a82c7..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/bloom-functions.fxh +++ /dev/null @@ -1,317 +0,0 @@ -#ifndef BLOOM_FUNCTIONS_H -#define BLOOM_FUNCTIONS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// These utility functions and constants help several passes determine the -// size and center texel weight of the phosphor bloom in a uniform manner. - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -// We need to calculate the correct blur sigma using some .cgp constants: -#include "user-settings.fxh" -#include "derived-settings-and-constants.fxh" -#include "blur-functions.fxh" - - -/////////////////////////////// BLOOM CONSTANTS ////////////////////////////// - -// Compute constants with manual inlines of the functions below: -static const float bloom_diff_thresh = 1.0/256.0; - - - -/////////////////////////////////// HELPERS ////////////////////////////////// - -float get_min_sigma_to_blur_triad(const float triad_size, - const float thresh) -{ - // Requires: 1.) triad_size is the final phosphor triad size in pixels - // 2.) thresh is the max desired pixel difference in the - // blurred triad (e.g. 1.0/256.0). - // Returns: Return the minimum sigma that will fully blur a phosphor - // triad on the screen to an even color, within thresh. - // This closed-form function was found by curve-fitting data. - // Estimate: max error = ~0.086036, mean sq. error = ~0.0013387: - return -0.05168 + 0.6113*triad_size - - 1.122*triad_size*sqrt(0.000416 + thresh); - // Estimate: max error = ~0.16486, mean sq. error = ~0.0041041: - //return 0.5985*triad_size - triad_size*sqrt(thresh) -} - -float get_absolute_scale_blur_sigma(const float thresh) -{ - // Requires: 1.) min_expected_triads must be a global float. The number - // of horizontal phosphor triads in the final image must be - // >= min_allowed_viewport_triads.x for realistic results. - // 2.) bloom_approx_scale_x must be a global float equal to the - // absolute horizontal scale of BLOOM_APPROX. - // 3.) bloom_approx_scale_x/min_allowed_viewport_triads.x - // should be <= 1.1658025090 to keep the final result < - // 0.62666015625 (the largest sigma ensuring the largest - // unused texel weight stays < 1.0/256.0 for a 3x3 blur). - // 4.) thresh is the max desired pixel difference in the - // blurred triad (e.g. 1.0/256.0). - // Returns: Return the minimum Gaussian sigma that will blur the pass - // output as much as it would have taken to blur away - // bloom_approx_scale_x horizontal phosphor triads. - // Description: - // BLOOM_APPROX should look like a downscaled phosphor blur. Ideally, we'd - // use the same blur sigma as the actual phosphor bloom and scale it down - // to the current resolution with (bloom_approx_scale_x/viewport_size_x), but - // we don't know the viewport size in this pass. Instead, we'll blur as - // much as it would take to blur away min_allowed_viewport_triads.x. This - // will blur "more than necessary" if the user actually uses more triads, - // but that's not terrible either, because blurring a constant fraction of - // the viewport may better resemble a true optical bloom anyway (since the - // viewport will generally be about the same fraction of each player's - // field of view, regardless of screen size and resolution). - // Assume an extremely large viewport size for asymptotic results. - return bloom_approx_scale_x/max_viewport_size_x * - get_min_sigma_to_blur_triad( - max_viewport_size_x/min_allowed_viewport_triads.x, thresh); -} - -float get_center_weight(const float sigma) -{ - // Given a Gaussian blur sigma, get the blur weight for the center texel. - #ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA - return get_fast_gaussian_weight_sum_inv(sigma); - #else - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float w6 = exp(-36.0 * denom_inv); - const float w7 = exp(-49.0 * denom_inv); - const float w8 = exp(-64.0 * denom_inv); - const float w9 = exp(-81.0 * denom_inv); - const float w10 = exp(-100.0 * denom_inv); - const float w11 = exp(-121.0 * denom_inv); - const float w12 = exp(-144.0 * denom_inv); - const float w13 = exp(-169.0 * denom_inv); - const float w14 = exp(-196.0 * denom_inv); - const float w15 = exp(-225.0 * denom_inv); - const float w16 = exp(-256.0 * denom_inv); - const float w17 = exp(-289.0 * denom_inv); - const float w18 = exp(-324.0 * denom_inv); - const float w19 = exp(-361.0 * denom_inv); - const float w20 = exp(-400.0 * denom_inv); - const float w21 = exp(-441.0 * denom_inv); - // Note: If the implementation uses a smaller blur than the max allowed, - // the worst case scenario is that the center weight will be overestimated, - // so we'll put a bit more energy into the brightpass...no huge deal. - // Then again, if the implementation uses a larger blur than the max - // "allowed" because of dynamic branching, the center weight could be - // underestimated, which is more of a problem...consider always using - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS - // 43x blur: - const float weight_sum_inv = 1.0 / - (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + - w11 + w12 + w13 + w14 + w15 + w16 + w17 + w18 + w19 + w20 + w21)); - #else - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS - // 31x blur: - const float weight_sum_inv = 1.0 / - (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + - w8 + w9 + w10 + w11 + w12 + w13 + w14 + w15)); - #else - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS - // 25x blur: - const float weight_sum_inv = 1.0 / (w0 + 2.0 * ( - w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12)); - #else - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS - // 17x blur: - const float weight_sum_inv = 1.0 / (w0 + 2.0 * ( - w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8)); - #else - // 9x blur: - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4)); - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS - const float center_weight = weight_sum_inv * weight_sum_inv; - return center_weight; - #endif -} - -float3 tex2DblurNfast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // If sigma is static, we can safely branch and use the smallest blur - // that's big enough. Ignore #define hints, because we'll only use a - // large blur if we actually need it, and the branches cost nothing. - #ifndef RUNTIME_PHOSPHOR_BLOOM_SIGMA - #define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE - #else - // It's still worth branching if the profile supports dynamic branches: - // It's much faster than using a hugely excessive blur, but each branch - // eats ~1% FPS. - #ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES - #define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE - #endif - #endif - // Failed optimization notes: - // I originally created a same-size mipmapped 5-tap separable blur10 that - // could handle any sigma by reaching into lower mip levels. It was - // as fast as blur25fast for runtime sigmas and a tad faster than - // blur31fast for static sigmas, but mipmapping two viewport-size passes - // ate 10% of FPS across all codepaths, so it wasn't worth it. - #ifdef PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE - if(sigma <= blur9_std_dev) - { - return tex2Dblur9fast(tex, tex_uv, dxdy, sigma); - } - else if(sigma <= blur17_std_dev) - { - return tex2Dblur17fast(tex, tex_uv, dxdy, sigma); - } - else if(sigma <= blur25_std_dev) - { - return tex2Dblur25fast(tex, tex_uv, dxdy, sigma); - } - else if(sigma <= blur31_std_dev) - { - return tex2Dblur31fast(tex, tex_uv, dxdy, sigma); - } - else - { - return tex2Dblur43fast(tex, tex_uv, dxdy, sigma); - } - #else - // If we can't afford to branch, we can only guess at what blur - // size we need. Therefore, use the largest blur allowed. - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS - return tex2Dblur43fast(tex, tex_uv, dxdy, sigma); - #else - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS - return tex2Dblur31fast(tex, tex_uv, dxdy, sigma); - #else - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS - return tex2Dblur25fast(tex, tex_uv, dxdy, sigma); - #else - #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS - return tex2Dblur17fast(tex, tex_uv, dxdy, sigma); - #else - return tex2Dblur9fast(tex, tex_uv, dxdy, sigma); - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS - #endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS - #endif // PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE -} - -float get_bloom_approx_sigma(const float output_size_x_runtime, - const float estimated_viewport_size_x) -{ - // Requires: 1.) output_size_x_runtime == BLOOM_APPROX.output_size.x. - // This is included for dynamic codepaths just in case the - // following two globals are incorrect: - // 2.) bloom_approx_size_x_for_skip should == the same - // if PHOSPHOR_BLOOM_FAKE is #defined - // 3.) bloom_approx_size_x should == the same otherwise - // Returns: For gaussian4x4, return a dynamic small bloom sigma that's - // as close to optimal as possible given available information. - // For blur3x3, return the a static small bloom sigma that - // works well for typical cases. Otherwise, we're using simple - // bilinear filtering, so use static calculations. - // Assume the default static value. This is a compromise that ensures - // typical triads are blurred, even if unusually large ones aren't. - static const float mask_num_triads_static = - max(min_allowed_viewport_triads.x, mask_num_triads_desired_static); - const float mask_num_triads_from_size = - estimated_viewport_size_x/mask_triad_size_desired; - const float mask_num_triads_runtime = max(min_allowed_viewport_triads.x, - lerp(mask_num_triads_from_size, mask_num_triads_desired, - mask_specify_num_triads)); - // Assume an extremely large viewport size for asymptotic results: - static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0); - if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize - { - // Use the runtime num triads and output size: - const float asymptotic_triad_size = - max_viewport_size_x/mask_num_triads_runtime; - const float asymptotic_sigma = get_min_sigma_to_blur_triad( - asymptotic_triad_size, bloom_diff_thresh); - const float bloom_approx_sigma = - asymptotic_sigma * output_size_x_runtime/max_viewport_size_x; - // The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but - // account for the Gaussian scanline sigma from the last pass too. - // The bloom will be too wide horizontally but tall enough vertically. - return length(float2(bloom_approx_sigma, beam_max_sigma)); - } - else // 3x3 blur resize (the bilinear resize doesn't need a sigma) - { - // We're either using blur3x3 or bilinear filtering. The biggest - // reason to choose blur3x3 is to avoid dynamic weights, so use a - // static calculation. - #ifdef PHOSPHOR_BLOOM_FAKE - static const float output_size_x_static = - bloom_approx_size_x_for_fake; - #else - static const float output_size_x_static = bloom_approx_size_x; - #endif - static const float asymptotic_triad_size = - max_viewport_size_x/mask_num_triads_static; - const float asymptotic_sigma = get_min_sigma_to_blur_triad( - asymptotic_triad_size, bloom_diff_thresh); - const float bloom_approx_sigma = - asymptotic_sigma * output_size_x_static/max_viewport_size_x; - // The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but - // try accounting for the Gaussian scanline sigma from the last pass - // too; use the static default value: - return length(float2(bloom_approx_sigma, beam_max_sigma_static)); - } -} - -float get_final_bloom_sigma(const float bloom_sigma_runtime) -{ - // Requires: 1.) bloom_sigma_runtime is a precalculated sigma that's - // optimal for the [known] triad size. - // 2.) Call this from a fragment shader (not a vertex shader), - // or blurring with static sigmas won't be constant-folded. - // Returns: Return the optimistic static sigma if the triad size is - // known at compile time. Otherwise return the optimal runtime - // sigma (10% slower) or an implementation-specific compromise - // between an optimistic or pessimistic static sigma. - // Notes: Call this from the fragment shader, NOT the vertex shader, - // so static sigmas can be constant-folded! - const float bloom_sigma_optimistic = get_min_sigma_to_blur_triad( - mask_triad_size_desired_static, bloom_diff_thresh); - #ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA - return bloom_sigma_runtime; - #else - // Overblurring looks as bad as underblurring, so assume average-size - // triads, not worst-case huge triads: - return bloom_sigma_optimistic; - #endif -} - - -#endif // BLOOM_FUNCTIONS_H - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/blur-functions.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/blur-functions.fxh deleted file mode 100644 index b1d52c478..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/blur-functions.fxh +++ /dev/null @@ -1,1916 +0,0 @@ -#ifndef BLUR_FUNCTIONS_H -#define BLUR_FUNCTIONS_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// This file provides reusable one-pass and separable (two-pass) blurs. -// Requires: All blurs share these requirements (dxdy requirement is split): -// 1.) All requirements of gamma-management.h must be satisfied! -// 2.) filter_linearN must == "true" in your .cgp preset unless -// you're using tex2DblurNresize at 1x scale. -// 3.) mipmap_inputN must == "true" in your .cgp preset if -// IN.output_size < IN.video_size. -// 4.) IN.output_size == IN.video_size / pow(2, M), where M is some -// positive integer. tex2Dblur*resize can resize arbitrarily -// (and the blur will be done after resizing), but arbitrary -// resizes "fail" with other blurs due to the way they mix -// static weights with bilinear sample exploitation. -// 5.) In general, dxdy should contain the uv pixel spacing: -// dxdy = (IN.video_size/IN.output_size)/IN.texture_size -// 6.) For separable blurs (tex2DblurNresize and tex2DblurNfast), -// zero out the dxdy component in the unblurred dimension: -// dxdy = float2(dxdy.x, 0.0) or float2(0.0, dxdy.y) -// Many blurs share these requirements: -// 1.) One-pass blurs require scale_xN == scale_yN or scales > 1.0, -// or they will blur more in the lower-scaled dimension. -// 2.) One-pass shared sample blurs require ddx(), ddy(), and -// tex2Dlod() to be supported by the current Cg profile, and -// the drivers must support high-quality derivatives. -// 3.) One-pass shared sample blurs require: -// tex_uv.w == log2(IN.video_size/IN.output_size).y; -// Non-wrapper blurs share this requirement: -// 1.) sigma is the intended standard deviation of the blur -// Wrapper blurs share this requirement, which is automatically -// met (unless OVERRIDE_BLUR_STD_DEVS is #defined; see below): -// 1.) blurN_std_dev must be global static const float values -// specifying standard deviations for Nx blurs in units -// of destination pixels -// Optional: 1.) The including file (or an earlier included file) may -// optionally #define USE_BINOMIAL_BLUR_STD_DEVS to replace -// default standard deviations with those matching a binomial -// distribution. (See below for details/properties.) -// 2.) The including file (or an earlier included file) may -// optionally #define OVERRIDE_BLUR_STD_DEVS and override: -// static const float blur3_std_dev -// static const float blur4_std_dev -// static const float blur5_std_dev -// static const float blur6_std_dev -// static const float blur7_std_dev -// static const float blur8_std_dev -// static const float blur9_std_dev -// static const float blur10_std_dev -// static const float blur11_std_dev -// static const float blur12_std_dev -// static const float blur17_std_dev -// static const float blur25_std_dev -// static const float blur31_std_dev -// static const float blur43_std_dev -// 3.) The including file (or an earlier included file) may -// optionally #define OVERRIDE_ERROR_BLURRING and override: -// static const float error_blurring -// This tuning value helps mitigate weighting errors from one- -// pass shared-sample blurs sharing bilinear samples between -// fragments. Values closer to 0.0 have "correct" blurriness -// but allow more artifacts, and values closer to 1.0 blur away -// artifacts by sampling closer to halfway between texels. -// UPDATE 6/21/14: The above static constants may now be overridden -// by non-static uniform constants. This permits exposing blur -// standard deviations as runtime GUI shader parameters. However, -// using them keeps weights from being statically computed, and the -// speed hit depends on the blur: On my machine, uniforms kill over -// 53% of the framerate with tex2Dblur12x12shared, but they only -// drop the framerate by about 18% with tex2Dblur11fast. -// Quality and Performance Comparisons: -// For the purposes of the following discussion, "no sRGB" means -// GAMMA_ENCODE_EVERY_FBO is #defined, and "sRGB" means it isn't. -// 1.) tex2DblurNfast is always faster than tex2DblurNresize. -// 2.) tex2DblurNresize functions are the only ones that can arbitrarily resize -// well, because they're the only ones that don't exploit bilinear samples. -// This also means they're the only functions which can be truly gamma- -// correct without linear (or sRGB FBO) input, but only at 1x scale. -// 3.) One-pass shared sample blurs only have a speed advantage without sRGB. -// They also have some inaccuracies due to their shared-[bilinear-]sample -// design, which grow increasingly bothersome for smaller blurs and higher- -// frequency source images (relative to their resolution). I had high -// hopes for them, but their most realistic use case is limited to quickly -// reblurring an already blurred input at full resolution. Otherwise: -// a.) If you're blurring a low-resolution source, you want a better blur. -// b.) If you're blurring a lower mipmap, you want a better blur. -// c.) If you're blurring a high-resolution, high-frequency source, you -// want a better blur. -// 4.) The one-pass blurs without shared samples grow slower for larger blurs, -// but they're competitive with separable blurs at 5x5 and smaller, and -// even tex2Dblur7x7 isn't bad if you're wanting to conserve passes. -// Here are some framerates from a GeForce 8800GTS. The first pass resizes to -// viewport size (4x in this test) and linearizes for sRGB codepaths, and the -// remaining passes perform 6 full blurs. Mipmapped tests are performed at the -// same scale, so they just measure the cost of mipmapping each FBO (only every -// other FBO is mipmapped for separable blurs, to mimic realistic usage). -// Mipmap Neither sRGB+Mipmap sRGB Function -// 76.0 92.3 131.3 193.7 tex2Dblur3fast -// 63.2 74.4 122.4 175.5 tex2Dblur3resize -// 93.7 121.2 159.3 263.2 tex2Dblur3x3 -// 59.7 68.7 115.4 162.1 tex2Dblur3x3resize -// 63.2 74.4 122.4 175.5 tex2Dblur5fast -// 49.3 54.8 100.0 132.7 tex2Dblur5resize -// 59.7 68.7 115.4 162.1 tex2Dblur5x5 -// 64.9 77.2 99.1 137.2 tex2Dblur6x6shared -// 55.8 63.7 110.4 151.8 tex2Dblur7fast -// 39.8 43.9 83.9 105.8 tex2Dblur7resize -// 40.0 44.2 83.2 104.9 tex2Dblur7x7 -// 56.4 65.5 71.9 87.9 tex2Dblur8x8shared -// 49.3 55.1 99.9 132.5 tex2Dblur9fast -// 33.3 36.2 72.4 88.0 tex2Dblur9resize -// 27.8 29.7 61.3 72.2 tex2Dblur9x9 -// 37.2 41.1 52.6 60.2 tex2Dblur10x10shared -// 44.4 49.5 91.3 117.8 tex2Dblur11fast -// 28.8 30.8 63.6 75.4 tex2Dblur11resize -// 33.6 36.5 40.9 45.5 tex2Dblur12x12shared -// TODO: Fill in benchmarks for new untested blurs. -// tex2Dblur17fast -// tex2Dblur25fast -// tex2Dblur31fast -// tex2Dblur43fast -// tex2Dblur3x3resize - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -// Set static standard deviations, but allow users to override them with their -// own constants (even non-static uniforms if they're okay with the speed hit): -#ifndef OVERRIDE_BLUR_STD_DEVS - // blurN_std_dev values are specified in terms of dxdy strides. - #ifdef USE_BINOMIAL_BLUR_STD_DEVS - // By request, we can define standard deviations corresponding to a - // binomial distribution with p = 0.5 (related to Pascal's triangle). - // This distribution works such that blurring multiple times should - // have the same result as a single larger blur. These values are - // larger than default for blurs up to 6x and smaller thereafter. - static const float blur3_std_dev = 0.84931640625; - static const float blur4_std_dev = 0.84931640625; - static const float blur5_std_dev = 1.0595703125; - static const float blur6_std_dev = 1.06591796875; - static const float blur7_std_dev = 1.17041015625; - static const float blur8_std_dev = 1.1720703125; - static const float blur9_std_dev = 1.2259765625; - static const float blur10_std_dev = 1.21982421875; - static const float blur11_std_dev = 1.25361328125; - static const float blur12_std_dev = 1.2423828125; - static const float blur17_std_dev = 1.27783203125; - static const float blur25_std_dev = 1.2810546875; - static const float blur31_std_dev = 1.28125; - static const float blur43_std_dev = 1.28125; - #else - // The defaults are the largest values that keep the largest unused - // blur term on each side <= 1.0/256.0. (We could get away with more - // or be more conservative, but this compromise is pretty reasonable.) - static const float blur3_std_dev = 0.62666015625; - static const float blur4_std_dev = 0.66171875; - static const float blur5_std_dev = 0.9845703125; - static const float blur6_std_dev = 1.02626953125; - static const float blur7_std_dev = 1.36103515625; - static const float blur8_std_dev = 1.4080078125; - static const float blur9_std_dev = 1.7533203125; - static const float blur10_std_dev = 1.80478515625; - static const float blur11_std_dev = 2.15986328125; - static const float blur12_std_dev = 2.215234375; - static const float blur17_std_dev = 3.45535583496; - static const float blur25_std_dev = 5.3409576416; - static const float blur31_std_dev = 6.86488037109; - static const float blur43_std_dev = 10.1852050781; - #endif // USE_BINOMIAL_BLUR_STD_DEVS -#endif // OVERRIDE_BLUR_STD_DEVS - -#ifndef OVERRIDE_ERROR_BLURRING - // error_blurring should be in [0.0, 1.0]. Higher values reduce ringing - // in shared-sample blurs but increase blurring and feature shifting. - static const float error_blurring = 0.5; -#endif - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -// gamma-management.h relies on pass-specific settings to guide its behavior: -// FIRST_PASS, LAST_PASS, GAMMA_ENCODE_EVERY_FBO, etc. See it for details. -#include "gamma-management.fxh" -#include "quad-pixel-communication.fxh" -#include "special-functions.fxh" - - -/////////////////////////////////// HELPERS ////////////////////////////////// - -float4 uv2_to_uv4(float2 tex_uv) -{ - // Make a float2 uv offset safe for adding to float4 tex2Dlod coords: - return float4(tex_uv, 0.0, 0.0); -} - -// Make a length squared helper macro (for usage with static constants): -#define LENGTH_SQ(vec) (dot(vec, vec)) - -float get_fast_gaussian_weight_sum_inv(const float sigma) -{ - // We can use the Gaussian integral to calculate the asymptotic weight for - // the center pixel. Since the unnormalized center pixel weight is 1.0, - // the normalized weight is the same as the weight sum inverse. Given a - // large enough blur (9+), the asymptotic weight sum is close and faster: - // center_weight = 0.5 * - // (erf(0.5/(sigma*sqrt(2.0))) - erf(-0.5/(sigma*sqrt(2.0)))) - // erf(-x) == -erf(x), so we get 0.5 * (2.0 * erf(blah blah)): - // However, we can get even faster results with curve-fitting. These are - // also closer than the asymptotic results, because they were constructed - // from 64 blurs sizes from [3, 131) and 255 equally-spaced sigmas from - // (0, blurN_std_dev), so the results for smaller sigmas are biased toward - // smaller blurs. The max error is 0.0031793913. - // Relative FPS: 134.3 with erf, 135.8 with curve-fitting. - //static const float temp = 0.5/sqrt(2.0); - //return erf(temp/sigma); - return min(exp(exp(0.348348412457428/ - (sigma - 0.0860587260734721))), 0.399334576340352/sigma); -} - - -//////////////////// ARBITRARILY RESIZABLE SEPARABLE BLURS /////////////////// - -float3 tex2Dblur11resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 1D 11x Gaussian blurred texture lookup using a 11-tap blur. - // It may be mipmapped depending on settings and dxdy. - // Calculate Gaussian blur kernel weights and a normalization factor for - // distances of 0-4, ignoring constant factors (since we're normalizing). - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float weight_sum_inv = 1.0 / - (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5)); - // Statically normalize weights, sum weighted samples, and return. Blurs are - // currently optimized for dynamic weights. - float3 sum = 0.0.xxx; - sum += w5 * tex2D_linearize(tex, tex_uv - 5.0 * dxdy).rgb; - sum += w4 * tex2D_linearize(tex, tex_uv - 4.0 * dxdy).rgb; - sum += w3 * tex2D_linearize(tex, tex_uv - 3.0 * dxdy).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv - 2.0 * dxdy).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv - 1.0 * dxdy).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv + 1.0 * dxdy).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv + 2.0 * dxdy).rgb; - sum += w3 * tex2D_linearize(tex, tex_uv + 3.0 * dxdy).rgb; - sum += w4 * tex2D_linearize(tex, tex_uv + 4.0 * dxdy).rgb; - sum += w5 * tex2D_linearize(tex, tex_uv + 5.0 * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur9resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 1D 9x Gaussian blurred texture lookup using a 9-tap blur. - // It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4)); - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w4 * tex2D_linearize(tex, tex_uv - 4.0 * dxdy).rgb; - sum += w3 * tex2D_linearize(tex, tex_uv - 3.0 * dxdy).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv - 2.0 * dxdy).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv - 1.0 * dxdy).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv + 1.0 * dxdy).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv + 2.0 * dxdy).rgb; - sum += w3 * tex2D_linearize(tex, tex_uv + 3.0 * dxdy).rgb; - sum += w4 * tex2D_linearize(tex, tex_uv + 4.0 * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur7resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 1D 7x Gaussian blurred texture lookup using a 7-tap blur. - // It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3)); - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w3 * tex2D_linearize(tex, tex_uv - 3.0 * dxdy).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv - 2.0 * dxdy).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv - 1.0 * dxdy).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv + 1.0 * dxdy).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv + 2.0 * dxdy).rgb; - sum += w3 * tex2D_linearize(tex, tex_uv + 3.0 * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur5resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 1D 5x Gaussian blurred texture lookup using a 5-tap blur. - // It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2)); - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w2 * tex2D_linearize(tex, tex_uv - 2.0 * dxdy).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv - 1.0 * dxdy).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv + 1.0 * dxdy).rgb; - sum += w2 * tex2D_linearize(tex, tex_uv + 2.0 * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur3resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 1D 3x Gaussian blurred texture lookup using a 3-tap blur. - // It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * w1); - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w1 * tex2D_linearize(tex, tex_uv - 1.0 * dxdy).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv).rgb; - sum += w1 * tex2D_linearize(tex, tex_uv + 1.0 * dxdy).rgb; - return sum * weight_sum_inv; -} - - -/////////////////////////// FAST SEPARABLE BLURS /////////////////////////// - -float3 tex2Dblur11fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: 1.) Global requirements must be met (see file description). - // 2.) filter_linearN must = "true" in your .cgp file. - // 3.) For gamma-correct bilinear filtering, global - // gamma_aware_bilinear == true (from gamma-management.h) - // Returns: A 1D 11x Gaussian blurred texture lookup using 6 linear - // taps. It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float weight_sum_inv = 1.0 / - (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5)); - // Calculate combined weights and linear sample ratios between texel pairs. - // The center texel (with weight w0) is used twice, so halve its weight. - const float w01 = w0 * 0.5 + w1; - const float w23 = w2 + w3; - const float w45 = w4 + w5; - const float w01_ratio = w1/w01; - const float w23_ratio = w3/w23; - const float w45_ratio = w5/w45; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w45 * tex2D_linearize(tex, tex_uv - (4.0 + w45_ratio) * dxdy).rgb; - sum += w23 * tex2D_linearize(tex, tex_uv - (2.0 + w23_ratio) * dxdy).rgb; - sum += w01 * tex2D_linearize(tex, tex_uv - w01_ratio * dxdy).rgb; - sum += w01 * tex2D_linearize(tex, tex_uv + w01_ratio * dxdy).rgb; - sum += w23 * tex2D_linearize(tex, tex_uv + (2.0 + w23_ratio) * dxdy).rgb; - sum += w45 * tex2D_linearize(tex, tex_uv + (4.0 + w45_ratio) * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur9fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 9x Gaussian blurred texture lookup using 1 nearest - // neighbor and 4 linear taps. It may be mipmapped depending - // on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4)); - // Calculate combined weights and linear sample ratios between texel pairs. - const float w12 = w1 + w2; - const float w34 = w3 + w4; - const float w12_ratio = w2/w12; - const float w34_ratio = w4/w34; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w34 * tex2D_linearize(tex, tex_uv - (3.0 + w34_ratio) * dxdy).rgb; - sum += w12 * tex2D_linearize(tex, tex_uv - (1.0 + w12_ratio) * dxdy).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv).rgb; - sum += w12 * tex2D_linearize(tex, tex_uv + (1.0 + w12_ratio) * dxdy).rgb; - sum += w34 * tex2D_linearize(tex, tex_uv + (3.0 + w34_ratio) * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur7fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 7x Gaussian blurred texture lookup using 4 linear - // taps. It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3)); - // Calculate combined weights and linear sample ratios between texel pairs. - // The center texel (with weight w0) is used twice, so halve its weight. - const float w01 = w0 * 0.5 + w1; - const float w23 = w2 + w3; - const float w01_ratio = w1/w01; - const float w23_ratio = w3/w23; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w23 * tex2D_linearize(tex, tex_uv - (2.0 + w23_ratio) * dxdy).rgb; - sum += w01 * tex2D_linearize(tex, tex_uv - w01_ratio * dxdy).rgb; - sum += w01 * tex2D_linearize(tex, tex_uv + w01_ratio * dxdy).rgb; - sum += w23 * tex2D_linearize(tex, tex_uv + (2.0 + w23_ratio) * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur5fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 5x Gaussian blurred texture lookup using 1 nearest - // neighbor and 2 linear taps. It may be mipmapped depending - // on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2)); - // Calculate combined weights and linear sample ratios between texel pairs. - const float w12 = w1 + w2; - const float w12_ratio = w2/w12; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w12 * tex2D_linearize(tex, tex_uv - (1.0 + w12_ratio) * dxdy).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv).rgb; - sum += w12 * tex2D_linearize(tex, tex_uv + (1.0 + w12_ratio) * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur3fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 3x Gaussian blurred texture lookup using 2 linear - // taps. It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float weight_sum_inv = 1.0 / (w0 + 2.0 * w1); - // Calculate combined weights and linear sample ratios between texel pairs. - // The center texel (with weight w0) is used twice, so halve its weight. - const float w01 = w0 * 0.5 + w1; - const float w01_ratio = w1/w01; - // Weights for all samples are the same, so just average them: - return 0.5 * ( - tex2D_linearize(tex, tex_uv - w01_ratio * dxdy).rgb + - tex2D_linearize(tex, tex_uv + w01_ratio * dxdy).rgb); -} - - -//////////////////////////// HUGE SEPARABLE BLURS //////////////////////////// - -// Huge separable blurs come only in "fast" versions. -float3 tex2Dblur43fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 43x Gaussian blurred texture lookup using 22 linear - // taps. It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float w6 = exp(-36.0 * denom_inv); - const float w7 = exp(-49.0 * denom_inv); - const float w8 = exp(-64.0 * denom_inv); - const float w9 = exp(-81.0 * denom_inv); - const float w10 = exp(-100.0 * denom_inv); - const float w11 = exp(-121.0 * denom_inv); - const float w12 = exp(-144.0 * denom_inv); - const float w13 = exp(-169.0 * denom_inv); - const float w14 = exp(-196.0 * denom_inv); - const float w15 = exp(-225.0 * denom_inv); - const float w16 = exp(-256.0 * denom_inv); - const float w17 = exp(-289.0 * denom_inv); - const float w18 = exp(-324.0 * denom_inv); - const float w19 = exp(-361.0 * denom_inv); - const float w20 = exp(-400.0 * denom_inv); - const float w21 = exp(-441.0 * denom_inv); - //const float weight_sum_inv = 1.0 / - // (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + - // w12 + w13 + w14 + w15 + w16 + w17 + w18 + w19 + w20 + w21)); - const float weight_sum_inv = get_fast_gaussian_weight_sum_inv(sigma); - // Calculate combined weights and linear sample ratios between texel pairs. - // The center texel (with weight w0) is used twice, so halve its weight. - const float w0_1 = w0 * 0.5 + w1; - const float w2_3 = w2 + w3; - const float w4_5 = w4 + w5; - const float w6_7 = w6 + w7; - const float w8_9 = w8 + w9; - const float w10_11 = w10 + w11; - const float w12_13 = w12 + w13; - const float w14_15 = w14 + w15; - const float w16_17 = w16 + w17; - const float w18_19 = w18 + w19; - const float w20_21 = w20 + w21; - const float w0_1_ratio = w1/w0_1; - const float w2_3_ratio = w3/w2_3; - const float w4_5_ratio = w5/w4_5; - const float w6_7_ratio = w7/w6_7; - const float w8_9_ratio = w9/w8_9; - const float w10_11_ratio = w11/w10_11; - const float w12_13_ratio = w13/w12_13; - const float w14_15_ratio = w15/w14_15; - const float w16_17_ratio = w17/w16_17; - const float w18_19_ratio = w19/w18_19; - const float w20_21_ratio = w21/w20_21; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w20_21 * tex2D_linearize(tex, tex_uv - (20.0 + w20_21_ratio) * dxdy).rgb; - sum += w18_19 * tex2D_linearize(tex, tex_uv - (18.0 + w18_19_ratio) * dxdy).rgb; - sum += w16_17 * tex2D_linearize(tex, tex_uv - (16.0 + w16_17_ratio) * dxdy).rgb; - sum += w14_15 * tex2D_linearize(tex, tex_uv - (14.0 + w14_15_ratio) * dxdy).rgb; - sum += w12_13 * tex2D_linearize(tex, tex_uv - (12.0 + w12_13_ratio) * dxdy).rgb; - sum += w10_11 * tex2D_linearize(tex, tex_uv - (10.0 + w10_11_ratio) * dxdy).rgb; - sum += w8_9 * tex2D_linearize(tex, tex_uv - (8.0 + w8_9_ratio) * dxdy).rgb; - sum += w6_7 * tex2D_linearize(tex, tex_uv - (6.0 + w6_7_ratio) * dxdy).rgb; - sum += w4_5 * tex2D_linearize(tex, tex_uv - (4.0 + w4_5_ratio) * dxdy).rgb; - sum += w2_3 * tex2D_linearize(tex, tex_uv - (2.0 + w2_3_ratio) * dxdy).rgb; - sum += w0_1 * tex2D_linearize(tex, tex_uv - w0_1_ratio * dxdy).rgb; - sum += w0_1 * tex2D_linearize(tex, tex_uv + w0_1_ratio * dxdy).rgb; - sum += w2_3 * tex2D_linearize(tex, tex_uv + (2.0 + w2_3_ratio) * dxdy).rgb; - sum += w4_5 * tex2D_linearize(tex, tex_uv + (4.0 + w4_5_ratio) * dxdy).rgb; - sum += w6_7 * tex2D_linearize(tex, tex_uv + (6.0 + w6_7_ratio) * dxdy).rgb; - sum += w8_9 * tex2D_linearize(tex, tex_uv + (8.0 + w8_9_ratio) * dxdy).rgb; - sum += w10_11 * tex2D_linearize(tex, tex_uv + (10.0 + w10_11_ratio) * dxdy).rgb; - sum += w12_13 * tex2D_linearize(tex, tex_uv + (12.0 + w12_13_ratio) * dxdy).rgb; - sum += w14_15 * tex2D_linearize(tex, tex_uv + (14.0 + w14_15_ratio) * dxdy).rgb; - sum += w16_17 * tex2D_linearize(tex, tex_uv + (16.0 + w16_17_ratio) * dxdy).rgb; - sum += w18_19 * tex2D_linearize(tex, tex_uv + (18.0 + w18_19_ratio) * dxdy).rgb; - sum += w20_21 * tex2D_linearize(tex, tex_uv + (20.0 + w20_21_ratio) * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur31fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 31x Gaussian blurred texture lookup using 16 linear - // taps. It may be mipmapped depending on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float w6 = exp(-36.0 * denom_inv); - const float w7 = exp(-49.0 * denom_inv); - const float w8 = exp(-64.0 * denom_inv); - const float w9 = exp(-81.0 * denom_inv); - const float w10 = exp(-100.0 * denom_inv); - const float w11 = exp(-121.0 * denom_inv); - const float w12 = exp(-144.0 * denom_inv); - const float w13 = exp(-169.0 * denom_inv); - const float w14 = exp(-196.0 * denom_inv); - const float w15 = exp(-225.0 * denom_inv); - //const float weight_sum_inv = 1.0 / - // (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + - // w9 + w10 + w11 + w12 + w13 + w14 + w15)); - const float weight_sum_inv = get_fast_gaussian_weight_sum_inv(sigma); - // Calculate combined weights and linear sample ratios between texel pairs. - // The center texel (with weight w0) is used twice, so halve its weight. - const float w0_1 = w0 * 0.5 + w1; - const float w2_3 = w2 + w3; - const float w4_5 = w4 + w5; - const float w6_7 = w6 + w7; - const float w8_9 = w8 + w9; - const float w10_11 = w10 + w11; - const float w12_13 = w12 + w13; - const float w14_15 = w14 + w15; - const float w0_1_ratio = w1/w0_1; - const float w2_3_ratio = w3/w2_3; - const float w4_5_ratio = w5/w4_5; - const float w6_7_ratio = w7/w6_7; - const float w8_9_ratio = w9/w8_9; - const float w10_11_ratio = w11/w10_11; - const float w12_13_ratio = w13/w12_13; - const float w14_15_ratio = w15/w14_15; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w14_15 * tex2D_linearize(tex, tex_uv - (14.0 + w14_15_ratio) * dxdy).rgb; - sum += w12_13 * tex2D_linearize(tex, tex_uv - (12.0 + w12_13_ratio) * dxdy).rgb; - sum += w10_11 * tex2D_linearize(tex, tex_uv - (10.0 + w10_11_ratio) * dxdy).rgb; - sum += w8_9 * tex2D_linearize(tex, tex_uv - (8.0 + w8_9_ratio) * dxdy).rgb; - sum += w6_7 * tex2D_linearize(tex, tex_uv - (6.0 + w6_7_ratio) * dxdy).rgb; - sum += w4_5 * tex2D_linearize(tex, tex_uv - (4.0 + w4_5_ratio) * dxdy).rgb; - sum += w2_3 * tex2D_linearize(tex, tex_uv - (2.0 + w2_3_ratio) * dxdy).rgb; - sum += w0_1 * tex2D_linearize(tex, tex_uv - w0_1_ratio * dxdy).rgb; - sum += w0_1 * tex2D_linearize(tex, tex_uv + w0_1_ratio * dxdy).rgb; - sum += w2_3 * tex2D_linearize(tex, tex_uv + (2.0 + w2_3_ratio) * dxdy).rgb; - sum += w4_5 * tex2D_linearize(tex, tex_uv + (4.0 + w4_5_ratio) * dxdy).rgb; - sum += w6_7 * tex2D_linearize(tex, tex_uv + (6.0 + w6_7_ratio) * dxdy).rgb; - sum += w8_9 * tex2D_linearize(tex, tex_uv + (8.0 + w8_9_ratio) * dxdy).rgb; - sum += w10_11 * tex2D_linearize(tex, tex_uv + (10.0 + w10_11_ratio) * dxdy).rgb; - sum += w12_13 * tex2D_linearize(tex, tex_uv + (12.0 + w12_13_ratio) * dxdy).rgb; - sum += w14_15 * tex2D_linearize(tex, tex_uv + (14.0 + w14_15_ratio) * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur25fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 25x Gaussian blurred texture lookup using 1 nearest - // neighbor and 12 linear taps. It may be mipmapped depending - // on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float w6 = exp(-36.0 * denom_inv); - const float w7 = exp(-49.0 * denom_inv); - const float w8 = exp(-64.0 * denom_inv); - const float w9 = exp(-81.0 * denom_inv); - const float w10 = exp(-100.0 * denom_inv); - const float w11 = exp(-121.0 * denom_inv); - const float w12 = exp(-144.0 * denom_inv); - //const float weight_sum_inv = 1.0 / (w0 + 2.0 * ( - // w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12)); - const float weight_sum_inv = get_fast_gaussian_weight_sum_inv(sigma); - // Calculate combined weights and linear sample ratios between texel pairs. - const float w1_2 = w1 + w2; - const float w3_4 = w3 + w4; - const float w5_6 = w5 + w6; - const float w7_8 = w7 + w8; - const float w9_10 = w9 + w10; - const float w11_12 = w11 + w12; - const float w1_2_ratio = w2/w1_2; - const float w3_4_ratio = w4/w3_4; - const float w5_6_ratio = w6/w5_6; - const float w7_8_ratio = w8/w7_8; - const float w9_10_ratio = w10/w9_10; - const float w11_12_ratio = w12/w11_12; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w11_12 * tex2D_linearize(tex, tex_uv - (11.0 + w11_12_ratio) * dxdy).rgb; - sum += w9_10 * tex2D_linearize(tex, tex_uv - (9.0 + w9_10_ratio) * dxdy).rgb; - sum += w7_8 * tex2D_linearize(tex, tex_uv - (7.0 + w7_8_ratio) * dxdy).rgb; - sum += w5_6 * tex2D_linearize(tex, tex_uv - (5.0 + w5_6_ratio) * dxdy).rgb; - sum += w3_4 * tex2D_linearize(tex, tex_uv - (3.0 + w3_4_ratio) * dxdy).rgb; - sum += w1_2 * tex2D_linearize(tex, tex_uv - (1.0 + w1_2_ratio) * dxdy).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv).rgb; - sum += w1_2 * tex2D_linearize(tex, tex_uv + (1.0 + w1_2_ratio) * dxdy).rgb; - sum += w3_4 * tex2D_linearize(tex, tex_uv + (3.0 + w3_4_ratio) * dxdy).rgb; - sum += w5_6 * tex2D_linearize(tex, tex_uv + (5.0 + w5_6_ratio) * dxdy).rgb; - sum += w7_8 * tex2D_linearize(tex, tex_uv + (7.0 + w7_8_ratio) * dxdy).rgb; - sum += w9_10 * tex2D_linearize(tex, tex_uv + (9.0 + w9_10_ratio) * dxdy).rgb; - sum += w11_12 * tex2D_linearize(tex, tex_uv + (11.0 + w11_12_ratio) * dxdy).rgb; - return sum * weight_sum_inv; -} - -float3 tex2Dblur17fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Same as tex2Dblur11() - // Returns: A 1D 17x Gaussian blurred texture lookup using 1 nearest - // neighbor and 8 linear taps. It may be mipmapped depending - // on settings and dxdy. - // First get the texel weights and normalization factor as above. - const float denom_inv = 0.5/(sigma*sigma); - const float w0 = 1.0; - const float w1 = exp(-1.0 * denom_inv); - const float w2 = exp(-4.0 * denom_inv); - const float w3 = exp(-9.0 * denom_inv); - const float w4 = exp(-16.0 * denom_inv); - const float w5 = exp(-25.0 * denom_inv); - const float w6 = exp(-36.0 * denom_inv); - const float w7 = exp(-49.0 * denom_inv); - const float w8 = exp(-64.0 * denom_inv); - //const float weight_sum_inv = 1.0 / (w0 + 2.0 * ( - // w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8)); - const float weight_sum_inv = get_fast_gaussian_weight_sum_inv(sigma); - // Calculate combined weights and linear sample ratios between texel pairs. - const float w1_2 = w1 + w2; - const float w3_4 = w3 + w4; - const float w5_6 = w5 + w6; - const float w7_8 = w7 + w8; - const float w1_2_ratio = w2/w1_2; - const float w3_4_ratio = w4/w3_4; - const float w5_6_ratio = w6/w5_6; - const float w7_8_ratio = w8/w7_8; - // Statically normalize weights, sum weighted samples, and return: - float3 sum = 0.0.xxx; - sum += w7_8 * tex2D_linearize(tex, tex_uv - (7.0 + w7_8_ratio) * dxdy).rgb; - sum += w5_6 * tex2D_linearize(tex, tex_uv - (5.0 + w5_6_ratio) * dxdy).rgb; - sum += w3_4 * tex2D_linearize(tex, tex_uv - (3.0 + w3_4_ratio) * dxdy).rgb; - sum += w1_2 * tex2D_linearize(tex, tex_uv - (1.0 + w1_2_ratio) * dxdy).rgb; - sum += w0 * tex2D_linearize(tex, tex_uv).rgb; - sum += w1_2 * tex2D_linearize(tex, tex_uv + (1.0 + w1_2_ratio) * dxdy).rgb; - sum += w3_4 * tex2D_linearize(tex, tex_uv + (3.0 + w3_4_ratio) * dxdy).rgb; - sum += w5_6 * tex2D_linearize(tex, tex_uv + (5.0 + w5_6_ratio) * dxdy).rgb; - sum += w7_8 * tex2D_linearize(tex, tex_uv + (7.0 + w7_8_ratio) * dxdy).rgb; - return sum * weight_sum_inv; -} - - -//////////////////// ARBITRARILY RESIZABLE ONE-PASS BLURS //////////////////// - -float3 tex2Dblur3x3resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Requires: Global requirements must be met (see file description). - // Returns: A 3x3 Gaussian blurred mipmapped texture lookup of the - // resized input. - // Description: - // This is the only arbitrarily resizable one-pass blur; tex2Dblur5x5resize - // would perform like tex2Dblur9x9, MUCH slower than tex2Dblur5resize. - const float denom_inv = 0.5/(sigma*sigma); - // Load each sample. We need all 3x3 samples. Quad-pixel communication - // won't help either: This should perform like tex2Dblur5x5, but sharing a - // 4x4 sample field would perform more like tex2Dblur8x8shared (worse). - const float2 sample4_uv = tex_uv; - const float2 dx = float2(dxdy.x, 0.0); - const float2 dy = float2(0.0, dxdy.y); - const float2 sample1_uv = sample4_uv - dy; - const float2 sample7_uv = sample4_uv + dy; - const float3 sample0 = tex2D_linearize(tex, sample1_uv - dx).rgb; - const float3 sample1 = tex2D_linearize(tex, sample1_uv).rgb; - const float3 sample2 = tex2D_linearize(tex, sample1_uv + dx).rgb; - const float3 sample3 = tex2D_linearize(tex, sample4_uv - dx).rgb; - const float3 sample4 = tex2D_linearize(tex, sample4_uv).rgb; - const float3 sample5 = tex2D_linearize(tex, sample4_uv + dx).rgb; - const float3 sample6 = tex2D_linearize(tex, sample7_uv - dx).rgb; - const float3 sample7 = tex2D_linearize(tex, sample7_uv).rgb; - const float3 sample8 = tex2D_linearize(tex, sample7_uv + dx).rgb; - // Statically compute Gaussian sample weights: - const float w4 = 1.0; - const float w1_3_5_7 = exp(-LENGTH_SQ(float2(1.0, 0.0)) * denom_inv); - const float w0_2_6_8 = exp(-LENGTH_SQ(float2(1.0, 1.0)) * denom_inv); - const float weight_sum_inv = 1.0/(w4 + 4.0 * (w1_3_5_7 + w0_2_6_8)); - // Weight and sum the samples: - const float3 sum = w4 * sample4 + - w1_3_5_7 * (sample1 + sample3 + sample5 + sample7) + - w0_2_6_8 * (sample0 + sample2 + sample6 + sample8); - return sum * weight_sum_inv; -} - - -//////////////////////////// FASTER ONE-PASS BLURS /////////////////////////// - -float3 tex2Dblur9x9(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Perform a 1-pass 9x9 blur with 5x5 bilinear samples. - // Requires: Same as tex2Dblur9() - // Returns: A 9x9 Gaussian blurred mipmapped texture lookup composed of - // 5x5 carefully selected bilinear samples. - // Description: - // Perform a 1-pass 9x9 blur with 5x5 bilinear samples. Adjust the - // bilinear sample location to reflect the true Gaussian weights for each - // underlying texel. The following diagram illustrates the relative - // locations of bilinear samples. Each sample with the same number has the - // same weight (notice the symmetry). The letters a, b, c, d distinguish - // quadrants, and the letters U, D, L, R, C (up, down, left, right, center) - // distinguish 1D directions along the line containing the pixel center: - // 6a 5a 2U 5b 6b - // 4a 3a 1U 3b 4b - // 2L 1L 0C 1R 2R - // 4c 3c 1D 3d 4d - // 6c 5c 2D 5d 6d - // The following diagram illustrates the underlying equally spaced texels, - // named after the sample that accesses them and subnamed by their location - // within their 2x2, 2x1, 1x2, or 1x1 texel block: - // 6a4 6a3 5a4 5a3 2U2 5b3 5b4 6b3 6b4 - // 6a2 6a1 5a2 5a1 2U1 5b1 5b2 6b1 6b2 - // 4a4 4a3 3a4 3a3 1U2 3b3 3b4 4b3 4b4 - // 4a2 4a1 3a2 3a1 1U1 3b1 3b2 4b1 4b2 - // 2L2 2L1 1L2 1L1 0C1 1R1 1R2 2R1 2R2 - // 4c2 4c1 3c2 3c1 1D1 3d1 3d2 4d1 4d2 - // 4c4 4c3 3c4 3c3 1D2 3d3 3d4 4d3 4d4 - // 6c2 6c1 5c2 5c1 2D1 5d1 5d2 6d1 6d2 - // 6c4 6c3 5c4 5c3 2D2 5d3 5d4 6d3 6d4 - // Note there is only one C texel and only two texels for each U, D, L, or - // R sample. The center sample is effectively a nearest neighbor sample, - // and the U/D/L/R samples use 1D linear filtering. All other texels are - // read with bilinear samples somewhere within their 2x2 texel blocks. - - // COMPUTE TEXTURE COORDS: - // Statically compute sampling offsets within each 2x2 texel block, based - // on 1D sampling ratios between texels [1, 2] and [3, 4] texels away from - // the center, and reuse them independently for both dimensions. Compute - // these offsets based on the relative 1D Gaussian weights of the texels - // in question. (w1off means "Gaussian weight for the texel 1.0 texels - // away from the pixel center," etc.). - const float denom_inv = 0.5/(sigma*sigma); - const float w1off = exp(-1.0 * denom_inv); - const float w2off = exp(-4.0 * denom_inv); - const float w3off = exp(-9.0 * denom_inv); - const float w4off = exp(-16.0 * denom_inv); - const float texel1to2ratio = w2off/(w1off + w2off); - const float texel3to4ratio = w4off/(w3off + w4off); - // Statically compute texel offsets from the fragment center to each - // bilinear sample in the bottom-right quadrant, including x-axis-aligned: - const float2 sample1R_texel_offset = float2(1.0, 0.0) + float2(texel1to2ratio, 0.0); - const float2 sample2R_texel_offset = float2(3.0, 0.0) + float2(texel3to4ratio, 0.0); - const float2 sample3d_texel_offset = float2(1.0, 1.0) + float2(texel1to2ratio, texel1to2ratio); - const float2 sample4d_texel_offset = float2(3.0, 1.0) + float2(texel3to4ratio, texel1to2ratio); - const float2 sample5d_texel_offset = float2(1.0, 3.0) + float2(texel1to2ratio, texel3to4ratio); - const float2 sample6d_texel_offset = float2(3.0, 3.0) + float2(texel3to4ratio, texel3to4ratio); - - // CALCULATE KERNEL WEIGHTS FOR ALL SAMPLES: - // Statically compute Gaussian texel weights for the bottom-right quadrant. - // Read underscores as "and." - const float w1R1 = w1off; - const float w1R2 = w2off; - const float w2R1 = w3off; - const float w2R2 = w4off; - const float w3d1 = exp(-LENGTH_SQ(float2(1.0, 1.0)) * denom_inv); - const float w3d2_3d3 = exp(-LENGTH_SQ(float2(2.0, 1.0)) * denom_inv); - const float w3d4 = exp(-LENGTH_SQ(float2(2.0, 2.0)) * denom_inv); - const float w4d1_5d1 = exp(-LENGTH_SQ(float2(3.0, 1.0)) * denom_inv); - const float w4d2_5d3 = exp(-LENGTH_SQ(float2(4.0, 1.0)) * denom_inv); - const float w4d3_5d2 = exp(-LENGTH_SQ(float2(3.0, 2.0)) * denom_inv); - const float w4d4_5d4 = exp(-LENGTH_SQ(float2(4.0, 2.0)) * denom_inv); - const float w6d1 = exp(-LENGTH_SQ(float2(3.0, 3.0)) * denom_inv); - const float w6d2_6d3 = exp(-LENGTH_SQ(float2(4.0, 3.0)) * denom_inv); - const float w6d4 = exp(-LENGTH_SQ(float2(4.0, 4.0)) * denom_inv); - // Statically add texel weights in each sample to get sample weights: - const float w0 = 1.0; - const float w1 = w1R1 + w1R2; - const float w2 = w2R1 + w2R2; - const float w3 = w3d1 + 2.0 * w3d2_3d3 + w3d4; - const float w4 = w4d1_5d1 + w4d2_5d3 + w4d3_5d2 + w4d4_5d4; - const float w5 = w4; - const float w6 = w6d1 + 2.0 * w6d2_6d3 + w6d4; - // Get the weight sum inverse (normalization factor): - const float weight_sum_inv = - 1.0/(w0 + 4.0 * (w1 + w2 + w3 + w4 + w5 + w6)); - - // LOAD TEXTURE SAMPLES: - // Load all 25 samples (1 nearest, 8 linear, 16 bilinear) using symmetry: - const float2 mirror_x = float2(-1.0, 1.0); - const float2 mirror_y = float2(1.0, -1.0); - const float2 mirror_xy = float2(-1.0, -1.0); - const float2 dxdy_mirror_x = dxdy * mirror_x; - const float2 dxdy_mirror_y = dxdy * mirror_y; - const float2 dxdy_mirror_xy = dxdy * mirror_xy; - // Sampling order doesn't seem to affect performance, so just be clear: - const float3 sample0C = tex2D_linearize(tex, tex_uv).rgb; - const float3 sample1R = tex2D_linearize(tex, tex_uv + dxdy * sample1R_texel_offset).rgb; - const float3 sample1D = tex2D_linearize(tex, tex_uv + dxdy * sample1R_texel_offset.yx).rgb; - const float3 sample1L = tex2D_linearize(tex, tex_uv - dxdy * sample1R_texel_offset).rgb; - const float3 sample1U = tex2D_linearize(tex, tex_uv - dxdy * sample1R_texel_offset.yx).rgb; - const float3 sample2R = tex2D_linearize(tex, tex_uv + dxdy * sample2R_texel_offset).rgb; - const float3 sample2D = tex2D_linearize(tex, tex_uv + dxdy * sample2R_texel_offset.yx).rgb; - const float3 sample2L = tex2D_linearize(tex, tex_uv - dxdy * sample2R_texel_offset).rgb; - const float3 sample2U = tex2D_linearize(tex, tex_uv - dxdy * sample2R_texel_offset.yx).rgb; - const float3 sample3d = tex2D_linearize(tex, tex_uv + dxdy * sample3d_texel_offset).rgb; - const float3 sample3c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample3d_texel_offset).rgb; - const float3 sample3b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample3d_texel_offset).rgb; - const float3 sample3a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample3d_texel_offset).rgb; - const float3 sample4d = tex2D_linearize(tex, tex_uv + dxdy * sample4d_texel_offset).rgb; - const float3 sample4c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample4d_texel_offset).rgb; - const float3 sample4b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample4d_texel_offset).rgb; - const float3 sample4a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample4d_texel_offset).rgb; - const float3 sample5d = tex2D_linearize(tex, tex_uv + dxdy * sample5d_texel_offset).rgb; - const float3 sample5c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample5d_texel_offset).rgb; - const float3 sample5b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample5d_texel_offset).rgb; - const float3 sample5a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample5d_texel_offset).rgb; - const float3 sample6d = tex2D_linearize(tex, tex_uv + dxdy * sample6d_texel_offset).rgb; - const float3 sample6c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample6d_texel_offset).rgb; - const float3 sample6b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample6d_texel_offset).rgb; - const float3 sample6a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample6d_texel_offset).rgb; - - // SUM WEIGHTED SAMPLES: - // Statically normalize weights (so total = 1.0), and sum weighted samples. - float3 sum = w0 * sample0C; - sum += w1 * (sample1R + sample1D + sample1L + sample1U); - sum += w2 * (sample2R + sample2D + sample2L + sample2U); - sum += w3 * (sample3d + sample3c + sample3b + sample3a); - sum += w4 * (sample4d + sample4c + sample4b + sample4a); - sum += w5 * (sample5d + sample5c + sample5b + sample5a); - sum += w6 * (sample6d + sample6c + sample6b + sample6a); - return sum * weight_sum_inv; -} - -float3 tex2Dblur7x7(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Perform a 1-pass 7x7 blur with 5x5 bilinear samples. - // Requires: Same as tex2Dblur9() - // Returns: A 7x7 Gaussian blurred mipmapped texture lookup composed of - // 4x4 carefully selected bilinear samples. - // Description: - // First see the descriptions for tex2Dblur9x9() and tex2Dblur7(). This - // blur mixes concepts from both. The sample layout is as follows: - // 4a 3a 3b 4b - // 2a 1a 1b 2b - // 2c 1c 1d 2d - // 4c 3c 3d 4d - // The texel layout is as follows. Note that samples 3a/3b, 1a/1b, 1c/1d, - // and 3c/3d share a vertical column of texels, and samples 2a/2c, 1a/1c, - // 1b/1d, and 2b/2d share a horizontal row of texels (all sample1's share - // the center texel): - // 4a4 4a3 3a4 3ab3 3b4 4b3 4b4 - // 4a2 4a1 3a2 3ab1 3b2 4b1 4b2 - // 2a4 2a3 1a4 1ab3 1b4 2b3 2b4 - // 2ac2 2ac1 1ac2 1* 1bd2 2bd1 2bd2 - // 2c4 2c3 1c4 1cd3 1d4 2d3 2d4 - // 4c2 4c1 3c2 3cd1 3d2 4d1 4d2 - // 4c4 4c3 3c4 3cd3 3d4 4d3 4d4 - - // COMPUTE TEXTURE COORDS: - // Statically compute bilinear sampling offsets (details in tex2Dblur9x9). - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w1off = exp(-1.0 * denom_inv); - const float w2off = exp(-4.0 * denom_inv); - const float w3off = exp(-9.0 * denom_inv); - const float texel0to1ratio = w1off/(w0off * 0.5 + w1off); - const float texel2to3ratio = w3off/(w2off + w3off); - // Statically compute texel offsets from the fragment center to each - // bilinear sample in the bottom-right quadrant, including axis-aligned: - const float2 sample1d_texel_offset = float2(texel0to1ratio, texel0to1ratio); - const float2 sample2d_texel_offset = float2(2.0, 0.0) + float2(texel2to3ratio, texel0to1ratio); - const float2 sample3d_texel_offset = float2(0.0, 2.0) + float2(texel0to1ratio, texel2to3ratio); - const float2 sample4d_texel_offset = float2(2.0, 2.0) + float2(texel2to3ratio, texel2to3ratio); - - // CALCULATE KERNEL WEIGHTS FOR ALL SAMPLES: - // Statically compute Gaussian texel weights for the bottom-right quadrant. - // Read underscores as "and." - const float w1abcd = 1.0; - const float w1bd2_1cd3 = exp(-LENGTH_SQ(float2(1.0, 0.0)) * denom_inv); - const float w2bd1_3cd1 = exp(-LENGTH_SQ(float2(2.0, 0.0)) * denom_inv); - const float w2bd2_3cd2 = exp(-LENGTH_SQ(float2(3.0, 0.0)) * denom_inv); - const float w1d4 = exp(-LENGTH_SQ(float2(1.0, 1.0)) * denom_inv); - const float w2d3_3d2 = exp(-LENGTH_SQ(float2(2.0, 1.0)) * denom_inv); - const float w2d4_3d4 = exp(-LENGTH_SQ(float2(3.0, 1.0)) * denom_inv); - const float w4d1 = exp(-LENGTH_SQ(float2(2.0, 2.0)) * denom_inv); - const float w4d2_4d3 = exp(-LENGTH_SQ(float2(3.0, 2.0)) * denom_inv); - const float w4d4 = exp(-LENGTH_SQ(float2(3.0, 3.0)) * denom_inv); - // Statically add texel weights in each sample to get sample weights. - // Split weights for shared texels between samples sharing them: - const float w1 = w1abcd * 0.25 + w1bd2_1cd3 + w1d4; - const float w2_3 = (w2bd1_3cd1 + w2bd2_3cd2) * 0.5 + w2d3_3d2 + w2d4_3d4; - const float w4 = w4d1 + 2.0 * w4d2_4d3 + w4d4; - // Get the weight sum inverse (normalization factor): - const float weight_sum_inv = - 1.0/(4.0 * (w1 + 2.0 * w2_3 + w4)); - - // LOAD TEXTURE SAMPLES: - // Load all 16 samples using symmetry: - const float2 mirror_x = float2(-1.0, 1.0); - const float2 mirror_y = float2(1.0, -1.0); - const float2 mirror_xy = float2(-1.0, -1.0); - const float2 dxdy_mirror_x = dxdy * mirror_x; - const float2 dxdy_mirror_y = dxdy * mirror_y; - const float2 dxdy_mirror_xy = dxdy * mirror_xy; - const float3 sample1a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample1d_texel_offset).rgb; - const float3 sample2a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample2d_texel_offset).rgb; - const float3 sample3a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample3d_texel_offset).rgb; - const float3 sample4a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample4d_texel_offset).rgb; - const float3 sample1b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample1d_texel_offset).rgb; - const float3 sample2b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample2d_texel_offset).rgb; - const float3 sample3b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample3d_texel_offset).rgb; - const float3 sample4b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample4d_texel_offset).rgb; - const float3 sample1c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample1d_texel_offset).rgb; - const float3 sample2c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample2d_texel_offset).rgb; - const float3 sample3c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample3d_texel_offset).rgb; - const float3 sample4c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample4d_texel_offset).rgb; - const float3 sample1d = tex2D_linearize(tex, tex_uv + dxdy * sample1d_texel_offset).rgb; - const float3 sample2d = tex2D_linearize(tex, tex_uv + dxdy * sample2d_texel_offset).rgb; - const float3 sample3d = tex2D_linearize(tex, tex_uv + dxdy * sample3d_texel_offset).rgb; - const float3 sample4d = tex2D_linearize(tex, tex_uv + dxdy * sample4d_texel_offset).rgb; - - // SUM WEIGHTED SAMPLES: - // Statically normalize weights (so total = 1.0), and sum weighted samples. - float3 sum = 0.0.xxx; - sum += w1 * (sample1a + sample1b + sample1c + sample1d); - sum += w2_3 * (sample2a + sample2b + sample2c + sample2d); - sum += w2_3 * (sample3a + sample3b + sample3c + sample3d); - sum += w4 * (sample4a + sample4b + sample4c + sample4d); - return sum * weight_sum_inv; -} - -float3 tex2Dblur5x5(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Perform a 1-pass 5x5 blur with 3x3 bilinear samples. - // Requires: Same as tex2Dblur9() - // Returns: A 5x5 Gaussian blurred mipmapped texture lookup composed of - // 3x3 carefully selected bilinear samples. - // Description: - // First see the description for tex2Dblur9x9(). This blur uses the same - // concept and sample/texel locations except on a smaller scale. Samples: - // 2a 1U 2b - // 1L 0C 1R - // 2c 1D 2d - // Texels: - // 2a4 2a3 1U2 2b3 2b4 - // 2a2 2a1 1U1 2b1 2b2 - // 1L2 1L1 0C1 1R1 1R2 - // 2c2 2c1 1D1 2d1 2d2 - // 2c4 2c3 1D2 2d3 2d4 - - // COMPUTE TEXTURE COORDS: - // Statically compute bilinear sampling offsets (details in tex2Dblur9x9). - const float denom_inv = 0.5/(sigma*sigma); - const float w1off = exp(-1.0 * denom_inv); - const float w2off = exp(-4.0 * denom_inv); - const float texel1to2ratio = w2off/(w1off + w2off); - // Statically compute texel offsets from the fragment center to each - // bilinear sample in the bottom-right quadrant, including x-axis-aligned: - const float2 sample1R_texel_offset = float2(1.0, 0.0) + float2(texel1to2ratio, 0.0); - const float2 sample2d_texel_offset = float2(1.0, 1.0) + float2(texel1to2ratio, texel1to2ratio); - - // CALCULATE KERNEL WEIGHTS FOR ALL SAMPLES: - // Statically compute Gaussian texel weights for the bottom-right quadrant. - // Read underscores as "and." - const float w1R1 = w1off; - const float w1R2 = w2off; - const float w2d1 = exp(-LENGTH_SQ(float2(1.0, 1.0)) * denom_inv); - const float w2d2_3 = exp(-LENGTH_SQ(float2(2.0, 1.0)) * denom_inv); - const float w2d4 = exp(-LENGTH_SQ(float2(2.0, 2.0)) * denom_inv); - // Statically add texel weights in each sample to get sample weights: - const float w0 = 1.0; - const float w1 = w1R1 + w1R2; - const float w2 = w2d1 + 2.0 * w2d2_3 + w2d4; - // Get the weight sum inverse (normalization factor): - const float weight_sum_inv = 1.0/(w0 + 4.0 * (w1 + w2)); - - // LOAD TEXTURE SAMPLES: - // Load all 9 samples (1 nearest, 4 linear, 4 bilinear) using symmetry: - const float2 mirror_x = float2(-1.0, 1.0); - const float2 mirror_y = float2(1.0, -1.0); - const float2 mirror_xy = float2(-1.0, -1.0); - const float2 dxdy_mirror_x = dxdy * mirror_x; - const float2 dxdy_mirror_y = dxdy * mirror_y; - const float2 dxdy_mirror_xy = dxdy * mirror_xy; - const float3 sample0C = tex2D_linearize(tex, tex_uv).rgb; - const float3 sample1R = tex2D_linearize(tex, tex_uv + dxdy * sample1R_texel_offset).rgb; - const float3 sample1D = tex2D_linearize(tex, tex_uv + dxdy * sample1R_texel_offset.yx).rgb; - const float3 sample1L = tex2D_linearize(tex, tex_uv - dxdy * sample1R_texel_offset).rgb; - const float3 sample1U = tex2D_linearize(tex, tex_uv - dxdy * sample1R_texel_offset.yx).rgb; - const float3 sample2d = tex2D_linearize(tex, tex_uv + dxdy * sample2d_texel_offset).rgb; - const float3 sample2c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample2d_texel_offset).rgb; - const float3 sample2b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample2d_texel_offset).rgb; - const float3 sample2a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample2d_texel_offset).rgb; - - // SUM WEIGHTED SAMPLES: - // Statically normalize weights (so total = 1.0), and sum weighted samples. - float3 sum = w0 * sample0C; - sum += w1 * (sample1R + sample1D + sample1L + sample1U); - sum += w2 * (sample2a + sample2b + sample2c + sample2d); - return sum * weight_sum_inv; -} - -float3 tex2Dblur3x3(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float sigma) -{ - // Perform a 1-pass 3x3 blur with 5x5 bilinear samples. - // Requires: Same as tex2Dblur9() - // Returns: A 3x3 Gaussian blurred mipmapped texture lookup composed of - // 2x2 carefully selected bilinear samples. - // Description: - // First see the descriptions for tex2Dblur9x9() and tex2Dblur7(). This - // blur mixes concepts from both. The sample layout is as follows: - // 0a 0b - // 0c 0d - // The texel layout is as follows. Note that samples 0a/0b and 0c/0d share - // a vertical column of texels, and samples 0a/0c and 0b/0d share a - // horizontal row of texels (all samples share the center texel): - // 0a3 0ab2 0b3 - // 0ac1 0*0 0bd1 - // 0c3 0cd2 0d3 - - // COMPUTE TEXTURE COORDS: - // Statically compute bilinear sampling offsets (details in tex2Dblur9x9). - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w1off = exp(-1.0 * denom_inv); - const float texel0to1ratio = w1off/(w0off * 0.5 + w1off); - // Statically compute texel offsets from the fragment center to each - // bilinear sample in the bottom-right quadrant, including axis-aligned: - const float2 sample0d_texel_offset = float2(texel0to1ratio, texel0to1ratio); - - // LOAD TEXTURE SAMPLES: - // Load all 4 samples using symmetry: - const float2 mirror_x = float2(-1.0, 1.0); - const float2 mirror_y = float2(1.0, -1.0); - const float2 mirror_xy = float2(-1.0, -1.0); - const float2 dxdy_mirror_x = dxdy * mirror_x; - const float2 dxdy_mirror_y = dxdy * mirror_y; - const float2 dxdy_mirror_xy = dxdy * mirror_xy; - const float3 sample0a = tex2D_linearize(tex, tex_uv + dxdy_mirror_xy * sample0d_texel_offset).rgb; - const float3 sample0b = tex2D_linearize(tex, tex_uv + dxdy_mirror_y * sample0d_texel_offset).rgb; - const float3 sample0c = tex2D_linearize(tex, tex_uv + dxdy_mirror_x * sample0d_texel_offset).rgb; - const float3 sample0d = tex2D_linearize(tex, tex_uv + dxdy * sample0d_texel_offset).rgb; - - // SUM WEIGHTED SAMPLES: - // Weights for all samples are the same, so just average them: - return 0.25 * (sample0a + sample0b + sample0c + sample0d); -} - - -////////////////// LINEAR ONE-PASS BLURS WITH SHARED SAMPLES ///////////////// - -float3 tex2Dblur12x12shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float sigma) -{ - // Perform a 1-pass mipmapped blur with shared samples across a pixel quad. - // Requires: 1.) Same as tex2Dblur9() - // 2.) ddx() and ddy() are present in the current Cg profile. - // 3.) The GPU driver is using fine/high-quality derivatives. - // 4.) quad_vector *correctly* describes the current fragment's - // location in its pixel quad, by the conventions noted in - // get_quad_vector[_naive]. - // 5.) tex_uv.w = log2(IN.video_size/IN.output_size).y - // 6.) tex2Dlod() is present in the current Cg profile. - // Optional: Tune artifacts vs. excessive blurriness with the global - // float error_blurring. - // Returns: A blurred texture lookup using a "virtual" 12x12 Gaussian - // blur (a 6x6 blur of carefully selected bilinear samples) - // of the given mip level. There will be subtle inaccuracies, - // especially for small or high-frequency detailed sources. - // Description: - // Perform a 1-pass blur with shared texture lookups across a pixel quad. - // We'll get neighboring samples with high-quality ddx/ddy derivatives, as - // in GPU Pro 2, Chapter VI.2, "Shader Amortization using Pixel Quad - // Message Passing" by Eric Penner. - // - // Our "virtual" 12x12 blur will be comprised of ((6 - 1)^2)/4 + 3 = 12 - // bilinear samples, where bilinear sampling positions are computed from - // the relative Gaussian weights of the 4 surrounding texels. The catch is - // that the appropriate texel weights and sample coords differ for each - // fragment, but we're reusing most of the same samples across a quad of - // destination fragments. (We do use unique coords for the four nearest - // samples at each fragment.) Mixing bilinear filtering and sample-sharing - // therefore introduces some error into the weights, and this can get nasty - // when the source image is small or high-frequency. Computing bilinear - // ratios based on weights at the sample field center results in sharpening - // and ringing artifacts, but we can move samples closer to halfway between - // texels to try blurring away the error (which can move features around by - // a texel or so). Tune this with the global float "error_blurring". - // - // The pixel quad's sample field covers 12x12 texels, accessed through 6x6 - // bilinear (2x2 texel) taps. Each fragment depends on a window of 10x10 - // texels (5x5 bilinear taps), and each fragment is responsible for loading - // a 6x6 texel quadrant as a 3x3 block of bilinear taps, plus 3 more taps - // to use unique bilinear coords for sample0* for each fragment. This - // diagram illustrates the relative locations of bilinear samples 1-9 for - // each quadrant a, b, c, d (note samples will not be equally spaced): - // 8a 7a 6a 6b 7b 8b - // 5a 4a 3a 3b 4b 5b - // 2a 1a 0a 0b 1b 2b - // 2c 1c 0c 0d 1d 2d - // 5c 4c 3c 3d 4d 5d - // 8c 7c 6c 6d 7d 8d - // The following diagram illustrates the underlying equally spaced texels, - // named after the sample that accesses them and subnamed by their location - // within their 2x2 texel block: - // 8a3 8a2 7a3 7a2 6a3 6a2 6b2 6b3 7b2 7b3 8b2 8b3 - // 8a1 8a0 7a1 7a0 6a1 6a0 6b0 6b1 7b0 7b1 8b0 8b1 - // 5a3 5a2 4a3 4a2 3a3 3a2 3b2 3b3 4b2 4b3 5b2 5b3 - // 5a1 5a0 4a1 4a0 3a1 3a0 3b0 3b1 4b0 4b1 5b0 5b1 - // 2a3 2a2 1a3 1a2 0a3 0a2 0b2 0b3 1b2 1b3 2b2 2b3 - // 2a1 2a0 1a1 1a0 0a1 0a0 0b0 0b1 1b0 1b1 2b0 2b1 - // 2c1 2c0 1c1 1c0 0c1 0c0 0d0 0d1 1d0 1d1 2d0 2d1 - // 2c3 2c2 1c3 1c2 0c3 0c2 0d2 0d3 1d2 1d3 2d2 2d3 - // 5c1 5c0 4c1 4c0 3c1 3c0 3d0 3d1 4d0 4d1 5d0 5d1 - // 5c3 5c2 4c3 4c2 3c3 3c2 3d2 3d3 4d2 4d3 5d2 5d3 - // 8c1 8c0 7c1 7c0 6c1 6c0 6d0 6d1 7d0 7d1 8d0 8d1 - // 8c3 8c2 7c3 7c2 6c3 6c2 6d2 6d3 7d2 7d3 8d2 8d3 - // With this symmetric arrangement, we don't have to know which absolute - // quadrant a sample lies in to assign kernel weights; it's enough to know - // the sample number and the relative quadrant of the sample (relative to - // the current quadrant): - // {current, adjacent x, adjacent y, diagonal} - - // COMPUTE COORDS FOR TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Statically compute sampling offsets within each 2x2 texel block, based - // on appropriate 1D Gaussian sampling ratio between texels [0, 1], [2, 3], - // and [4, 5] away from the fragment, and reuse them independently for both - // dimensions. Use the sample field center as the estimated destination, - // but nudge the result closer to halfway between texels to blur error. - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w0_5off = exp(-(0.5*0.5) * denom_inv); - const float w1off = exp(-(1.0*1.0) * denom_inv); - const float w1_5off = exp(-(1.5*1.5) * denom_inv); - const float w2off = exp(-(2.0*2.0) * denom_inv); - const float w2_5off = exp(-(2.5*2.5) * denom_inv); - const float w3_5off = exp(-(3.5*3.5) * denom_inv); - const float w4_5off = exp(-(4.5*4.5) * denom_inv); - const float w5_5off = exp(-(5.5*5.5) * denom_inv); - const float texel0to1ratio = lerp(w1_5off/(w0_5off + w1_5off), 0.5, error_blurring); - const float texel2to3ratio = lerp(w3_5off/(w2_5off + w3_5off), 0.5, error_blurring); - const float texel4to5ratio = lerp(w5_5off/(w4_5off + w5_5off), 0.5, error_blurring); - // We don't share sample0*, so use the nearest destination fragment: - const float texel0to1ratio_nearest = w1off/(w0off + w1off); - const float texel1to2ratio_nearest = w2off/(w1off + w2off); - // Statically compute texel offsets from the bottom-right fragment to each - // bilinear sample in the bottom-right quadrant: - const float2 sample0curr_texel_offset = float2(0.0, 0.0) + float2(texel0to1ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjx_texel_offset = float2(-1.0, 0.0) + float2(-texel1to2ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjy_texel_offset = float2(0.0, -1.0) + float2(texel0to1ratio_nearest, -texel1to2ratio_nearest); - const float2 sample0diag_texel_offset = float2(-1.0, -1.0) + float2(-texel1to2ratio_nearest, -texel1to2ratio_nearest); - const float2 sample1_texel_offset = float2(2.0, 0.0) + float2(texel2to3ratio, texel0to1ratio); - const float2 sample2_texel_offset = float2(4.0, 0.0) + float2(texel4to5ratio, texel0to1ratio); - const float2 sample3_texel_offset = float2(0.0, 2.0) + float2(texel0to1ratio, texel2to3ratio); - const float2 sample4_texel_offset = float2(2.0, 2.0) + float2(texel2to3ratio, texel2to3ratio); - const float2 sample5_texel_offset = float2(4.0, 2.0) + float2(texel4to5ratio, texel2to3ratio); - const float2 sample6_texel_offset = float2(0.0, 4.0) + float2(texel0to1ratio, texel4to5ratio); - const float2 sample7_texel_offset = float2(2.0, 4.0) + float2(texel2to3ratio, texel4to5ratio); - const float2 sample8_texel_offset = float2(4.0, 4.0) + float2(texel4to5ratio, texel4to5ratio); - - // CALCULATE KERNEL WEIGHTS: - // Statically compute bilinear sample weights at each destination fragment - // based on the sum of their 4 underlying texel weights. Assume a same- - // resolution blur, so each symmetrically named sample weight will compute - // the same at every fragment in the pixel quad: We can therefore compute - // texel weights based only on the bottom-right quadrant (fragment at 0d0). - // Too avoid too much boilerplate code, use a macro to get all 4 texel - // weights for a bilinear sample based on the offset of its top-left texel: - #define GET_TEXEL_QUAD_WEIGHTS(xoff, yoff) \ - (exp(-LENGTH_SQ(float2(xoff, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff, yoff + 1.0)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff + 1.0)) * denom_inv)) - const float w8diag = GET_TEXEL_QUAD_WEIGHTS(-6.0, -6.0); - const float w7diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -6.0); - const float w6diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -6.0); - const float w6adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -6.0); - const float w7adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -6.0); - const float w8adjy = GET_TEXEL_QUAD_WEIGHTS(4.0, -6.0); - const float w5diag = GET_TEXEL_QUAD_WEIGHTS(-6.0, -4.0); - const float w4diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -4.0); - const float w3diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -4.0); - const float w3adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -4.0); - const float w4adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -4.0); - const float w5adjy = GET_TEXEL_QUAD_WEIGHTS(4.0, -4.0); - const float w2diag = GET_TEXEL_QUAD_WEIGHTS(-6.0, -2.0); - const float w1diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -2.0); - const float w0diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -2.0); - const float w0adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -2.0); - const float w1adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -2.0); - const float w2adjy = GET_TEXEL_QUAD_WEIGHTS(4.0, -2.0); - const float w2adjx = GET_TEXEL_QUAD_WEIGHTS(-6.0, 0.0); - const float w1adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 0.0); - const float w0adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 0.0); - const float w0curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 0.0); - const float w1curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 0.0); - const float w2curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 0.0); - const float w5adjx = GET_TEXEL_QUAD_WEIGHTS(-6.0, 2.0); - const float w4adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 2.0); - const float w3adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 2.0); - const float w3curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 2.0); - const float w4curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 2.0); - const float w5curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 2.0); - const float w8adjx = GET_TEXEL_QUAD_WEIGHTS(-6.0, 4.0); - const float w7adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 4.0); - const float w6adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 4.0); - const float w6curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 4.0); - const float w7curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 4.0); - const float w8curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 4.0); - #undef GET_TEXEL_QUAD_WEIGHTS - // Statically pack weights for runtime: - const float4 w0 = float4(w0curr, w0adjx, w0adjy, w0diag); - const float4 w1 = float4(w1curr, w1adjx, w1adjy, w1diag); - const float4 w2 = float4(w2curr, w2adjx, w2adjy, w2diag); - const float4 w3 = float4(w3curr, w3adjx, w3adjy, w3diag); - const float4 w4 = float4(w4curr, w4adjx, w4adjy, w4diag); - const float4 w5 = float4(w5curr, w5adjx, w5adjy, w5diag); - const float4 w6 = float4(w6curr, w6adjx, w6adjy, w6diag); - const float4 w7 = float4(w7curr, w7adjx, w7adjy, w7diag); - const float4 w8 = float4(w8curr, w8adjx, w8adjy, w8diag); - // Get the weight sum inverse (normalization factor): - const float4 weight_sum4 = w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8; - const float2 weight_sum2 = weight_sum4.xy + weight_sum4.zw; - const float weight_sum = weight_sum2.x + weight_sum2.y; - const float weight_sum_inv = 1.0/(weight_sum); - - // LOAD TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Get a uv vector from texel 0q0 of this quadrant to texel 0q3: - const float2 dxdy_curr = dxdy * quad_vector.xy; - // Load bilinear samples for the current quadrant (for this fragment): - const float3 sample0curr = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0curr_texel_offset).rgb; - const float3 sample0adjx = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjx_texel_offset).rgb; - const float3 sample0adjy = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjy_texel_offset).rgb; - const float3 sample0diag = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0diag_texel_offset).rgb; - const float3 sample1curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample1_texel_offset)).rgb; - const float3 sample2curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample2_texel_offset)).rgb; - const float3 sample3curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample3_texel_offset)).rgb; - const float3 sample4curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample4_texel_offset)).rgb; - const float3 sample5curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample5_texel_offset)).rgb; - const float3 sample6curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample6_texel_offset)).rgb; - const float3 sample7curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample7_texel_offset)).rgb; - const float3 sample8curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample8_texel_offset)).rgb; - - // GATHER NEIGHBORING SAMPLES AND SUM WEIGHTED SAMPLES: - // Fetch the samples from other fragments in the 2x2 quad: - float3 sample1adjx, sample1adjy, sample1diag; - float3 sample2adjx, sample2adjy, sample2diag; - float3 sample3adjx, sample3adjy, sample3diag; - float3 sample4adjx, sample4adjy, sample4diag; - float3 sample5adjx, sample5adjy, sample5diag; - float3 sample6adjx, sample6adjy, sample6diag; - float3 sample7adjx, sample7adjy, sample7diag; - float3 sample8adjx, sample8adjy, sample8diag; - quad_gather(quad_vector, sample1curr, sample1adjx, sample1adjy, sample1diag); - quad_gather(quad_vector, sample2curr, sample2adjx, sample2adjy, sample2diag); - quad_gather(quad_vector, sample3curr, sample3adjx, sample3adjy, sample3diag); - quad_gather(quad_vector, sample4curr, sample4adjx, sample4adjy, sample4diag); - quad_gather(quad_vector, sample5curr, sample5adjx, sample5adjy, sample5diag); - quad_gather(quad_vector, sample6curr, sample6adjx, sample6adjy, sample6diag); - quad_gather(quad_vector, sample7curr, sample7adjx, sample7adjy, sample7diag); - quad_gather(quad_vector, sample8curr, sample8adjx, sample8adjy, sample8diag); - // Statically normalize weights (so total = 1.0), and sum weighted samples. - // Fill each row of a matrix with an rgb sample and pre-multiply by the - // weights to obtain a weighted result: - float3 sum = 0.0.xxx; - sum += mul(w0, float4x3(sample0curr, sample0adjx, sample0adjy, sample0diag)); - sum += mul(w1, float4x3(sample1curr, sample1adjx, sample1adjy, sample1diag)); - sum += mul(w2, float4x3(sample2curr, sample2adjx, sample2adjy, sample2diag)); - sum += mul(w3, float4x3(sample3curr, sample3adjx, sample3adjy, sample3diag)); - sum += mul(w4, float4x3(sample4curr, sample4adjx, sample4adjy, sample4diag)); - sum += mul(w5, float4x3(sample5curr, sample5adjx, sample5adjy, sample5diag)); - sum += mul(w6, float4x3(sample6curr, sample6adjx, sample6adjy, sample6diag)); - sum += mul(w7, float4x3(sample7curr, sample7adjx, sample7adjy, sample7diag)); - sum += mul(w8, float4x3(sample8curr, sample8adjx, sample8adjy, sample8diag)); - return sum * weight_sum_inv; -} - -float3 tex2Dblur10x10shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float sigma) -{ - // Perform a 1-pass mipmapped blur with shared samples across a pixel quad. - // Requires: Same as tex2Dblur12x12shared() - // Returns: A blurred texture lookup using a "virtual" 10x10 Gaussian - // blur (a 5x5 blur of carefully selected bilinear samples) - // of the given mip level. There will be subtle inaccuracies, - // especially for small or high-frequency detailed sources. - // Description: - // First see the description for tex2Dblur12x12shared(). This - // function shares the same concept and sample placement, but each fragment - // only uses 25 of the 36 samples taken across the pixel quad (to cover a - // 5x5 sample area, or 10x10 texel area), and it uses a lower standard - // deviation to compensate. Thanks to symmetry, the 11 omitted samples - // are always the "same:" - // 8adjx, 2adjx, 5adjx, - // 6adjy, 7adjy, 8adjy, - // 2diag, 5diag, 6diag, 7diag, 8diag - - // COMPUTE COORDS FOR TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Statically compute bilinear sampling offsets (details in tex2Dblur12x12shared). - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w0_5off = exp(-(0.5*0.5) * denom_inv); - const float w1off = exp(-(1.0*1.0) * denom_inv); - const float w1_5off = exp(-(1.5*1.5) * denom_inv); - const float w2off = exp(-(2.0*2.0) * denom_inv); - const float w2_5off = exp(-(2.5*2.5) * denom_inv); - const float w3_5off = exp(-(3.5*3.5) * denom_inv); - const float w4_5off = exp(-(4.5*4.5) * denom_inv); - const float w5_5off = exp(-(5.5*5.5) * denom_inv); - const float texel0to1ratio = lerp(w1_5off/(w0_5off + w1_5off), 0.5, error_blurring); - const float texel2to3ratio = lerp(w3_5off/(w2_5off + w3_5off), 0.5, error_blurring); - const float texel4to5ratio = lerp(w5_5off/(w4_5off + w5_5off), 0.5, error_blurring); - // We don't share sample0*, so use the nearest destination fragment: - const float texel0to1ratio_nearest = w1off/(w0off + w1off); - const float texel1to2ratio_nearest = w2off/(w1off + w2off); - // Statically compute texel offsets from the bottom-right fragment to each - // bilinear sample in the bottom-right quadrant: - const float2 sample0curr_texel_offset = float2(0.0, 0.0) + float2(texel0to1ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjx_texel_offset = float2(-1.0, 0.0) + float2(-texel1to2ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjy_texel_offset = float2(0.0, -1.0) + float2(texel0to1ratio_nearest, -texel1to2ratio_nearest); - const float2 sample0diag_texel_offset = float2(-1.0, -1.0) + float2(-texel1to2ratio_nearest, -texel1to2ratio_nearest); - const float2 sample1_texel_offset = float2(2.0, 0.0) + float2(texel2to3ratio, texel0to1ratio); - const float2 sample2_texel_offset = float2(4.0, 0.0) + float2(texel4to5ratio, texel0to1ratio); - const float2 sample3_texel_offset = float2(0.0, 2.0) + float2(texel0to1ratio, texel2to3ratio); - const float2 sample4_texel_offset = float2(2.0, 2.0) + float2(texel2to3ratio, texel2to3ratio); - const float2 sample5_texel_offset = float2(4.0, 2.0) + float2(texel4to5ratio, texel2to3ratio); - const float2 sample6_texel_offset = float2(0.0, 4.0) + float2(texel0to1ratio, texel4to5ratio); - const float2 sample7_texel_offset = float2(2.0, 4.0) + float2(texel2to3ratio, texel4to5ratio); - const float2 sample8_texel_offset = float2(4.0, 4.0) + float2(texel4to5ratio, texel4to5ratio); - - // CALCULATE KERNEL WEIGHTS: - // Statically compute bilinear sample weights at each destination fragment - // from the sum of their 4 texel weights (details in tex2Dblur12x12shared). - #define GET_TEXEL_QUAD_WEIGHTS(xoff, yoff) \ - (exp(-LENGTH_SQ(float2(xoff, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff, yoff + 1.0)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff + 1.0)) * denom_inv)) - // We only need 25 of the 36 sample weights. Skip the following weights: - // 8adjx, 2adjx, 5adjx, - // 6adjy, 7adjy, 8adjy, - // 2diag, 5diag, 6diag, 7diag, 8diag - const float w4diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -4.0); - const float w3diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -4.0); - const float w3adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -4.0); - const float w4adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -4.0); - const float w5adjy = GET_TEXEL_QUAD_WEIGHTS(4.0, -4.0); - const float w1diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -2.0); - const float w0diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -2.0); - const float w0adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -2.0); - const float w1adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -2.0); - const float w2adjy = GET_TEXEL_QUAD_WEIGHTS(4.0, -2.0); - const float w1adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 0.0); - const float w0adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 0.0); - const float w0curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 0.0); - const float w1curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 0.0); - const float w2curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 0.0); - const float w4adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 2.0); - const float w3adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 2.0); - const float w3curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 2.0); - const float w4curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 2.0); - const float w5curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 2.0); - const float w7adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 4.0); - const float w6adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 4.0); - const float w6curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 4.0); - const float w7curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 4.0); - const float w8curr = GET_TEXEL_QUAD_WEIGHTS(4.0, 4.0); - #undef GET_TEXEL_QUAD_WEIGHTS - // Get the weight sum inverse (normalization factor): - const float weight_sum_inv = 1.0/(w0curr + w1curr + w2curr + w3curr + - w4curr + w5curr + w6curr + w7curr + w8curr + - w0adjx + w1adjx + w3adjx + w4adjx + w6adjx + w7adjx + - w0adjy + w1adjy + w2adjy + w3adjy + w4adjy + w5adjy + - w0diag + w1diag + w3diag + w4diag); - // Statically pack most weights for runtime. Note the mixed packing: - const float4 w0 = float4(w0curr, w0adjx, w0adjy, w0diag); - const float4 w1 = float4(w1curr, w1adjx, w1adjy, w1diag); - const float4 w3 = float4(w3curr, w3adjx, w3adjy, w3diag); - const float4 w4 = float4(w4curr, w4adjx, w4adjy, w4diag); - const float4 w2and5 = float4(w2curr, w2adjy, w5curr, w5adjy); - const float4 w6and7 = float4(w6curr, w6adjx, w7curr, w7adjx); - - // LOAD TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Get a uv vector from texel 0q0 of this quadrant to texel 0q3: - const float2 dxdy_curr = dxdy * quad_vector.xy; - // Load bilinear samples for the current quadrant (for this fragment): - const float3 sample0curr = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0curr_texel_offset).rgb; - const float3 sample0adjx = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjx_texel_offset).rgb; - const float3 sample0adjy = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjy_texel_offset).rgb; - const float3 sample0diag = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0diag_texel_offset).rgb; - const float3 sample1curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample1_texel_offset)).rgb; - const float3 sample2curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample2_texel_offset)).rgb; - const float3 sample3curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample3_texel_offset)).rgb; - const float3 sample4curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample4_texel_offset)).rgb; - const float3 sample5curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample5_texel_offset)).rgb; - const float3 sample6curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample6_texel_offset)).rgb; - const float3 sample7curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample7_texel_offset)).rgb; - const float3 sample8curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample8_texel_offset)).rgb; - - // GATHER NEIGHBORING SAMPLES AND SUM WEIGHTED SAMPLES: - // Fetch the samples from other fragments in the 2x2 quad in order of need: - float3 sample1adjx, sample1adjy, sample1diag; - float3 sample2adjx, sample2adjy, sample2diag; - float3 sample3adjx, sample3adjy, sample3diag; - float3 sample4adjx, sample4adjy, sample4diag; - float3 sample5adjx, sample5adjy, sample5diag; - float3 sample6adjx, sample6adjy, sample6diag; - float3 sample7adjx, sample7adjy, sample7diag; - quad_gather(quad_vector, sample1curr, sample1adjx, sample1adjy, sample1diag); - quad_gather(quad_vector, sample2curr, sample2adjx, sample2adjy, sample2diag); - quad_gather(quad_vector, sample3curr, sample3adjx, sample3adjy, sample3diag); - quad_gather(quad_vector, sample4curr, sample4adjx, sample4adjy, sample4diag); - quad_gather(quad_vector, sample5curr, sample5adjx, sample5adjy, sample5diag); - quad_gather(quad_vector, sample6curr, sample6adjx, sample6adjy, sample6diag); - quad_gather(quad_vector, sample7curr, sample7adjx, sample7adjy, sample7diag); - // Statically normalize weights (so total = 1.0), and sum weighted samples. - // Fill each row of a matrix with an rgb sample and pre-multiply by the - // weights to obtain a weighted result. First do the simple ones: - float3 sum = 0.0.xxx; - sum += mul(w0, float4x3(sample0curr, sample0adjx, sample0adjy, sample0diag)); - sum += mul(w1, float4x3(sample1curr, sample1adjx, sample1adjy, sample1diag)); - sum += mul(w3, float4x3(sample3curr, sample3adjx, sample3adjy, sample3diag)); - sum += mul(w4, float4x3(sample4curr, sample4adjx, sample4adjy, sample4diag)); - // Now do the mixed-sample ones: - sum += mul(w2and5, float4x3(sample2curr, sample2adjy, sample5curr, sample5adjy)); - sum += mul(w6and7, float4x3(sample6curr, sample6adjx, sample7curr, sample7adjx)); - sum += w8curr * sample8curr; - // Normalize the sum (so the weights add to 1.0) and return: - return sum * weight_sum_inv; -} - -float3 tex2Dblur8x8shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float sigma) -{ - // Perform a 1-pass mipmapped blur with shared samples across a pixel quad. - // Requires: Same as tex2Dblur12x12shared() - // Returns: A blurred texture lookup using a "virtual" 8x8 Gaussian - // blur (a 4x4 blur of carefully selected bilinear samples) - // of the given mip level. There will be subtle inaccuracies, - // especially for small or high-frequency detailed sources. - // Description: - // First see the description for tex2Dblur12x12shared(). This function - // shares the same concept and a similar sample placement, except each - // quadrant contains 4x4 texels and 2x2 samples instead of 6x6 and 3x3 - // respectively. There could be a total of 16 samples, 4 of which each - // fragment is responsible for, but each fragment loads 0a/0b/0c/0d with - // its own offset to reduce shared sample artifacts, bringing the sample - // count for each fragment to 7. Sample placement: - // 3a 2a 2b 3b - // 1a 0a 0b 1b - // 1c 0c 0d 1d - // 3c 2c 2d 3d - // Texel placement: - // 3a3 3a2 2a3 2a2 2b2 2b3 3b2 3b3 - // 3a1 3a0 2a1 2a0 2b0 2b1 3b0 3b1 - // 1a3 1a2 0a3 0a2 0b2 0b3 1b2 1b3 - // 1a1 1a0 0a1 0a0 0b0 0b1 1b0 1b1 - // 1c1 1c0 0c1 0c0 0d0 0d1 1d0 1d1 - // 1c3 1c2 0c3 0c2 0d2 0d3 1d2 1d3 - // 3c1 3c0 2c1 2c0 2d0 2d1 3d0 4d1 - // 3c3 3c2 2c3 2c2 2d2 2d3 3d2 4d3 - - // COMPUTE COORDS FOR TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Statically compute bilinear sampling offsets (details in tex2Dblur12x12shared). - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w0_5off = exp(-(0.5*0.5) * denom_inv); - const float w1off = exp(-(1.0*1.0) * denom_inv); - const float w1_5off = exp(-(1.5*1.5) * denom_inv); - const float w2off = exp(-(2.0*2.0) * denom_inv); - const float w2_5off = exp(-(2.5*2.5) * denom_inv); - const float w3_5off = exp(-(3.5*3.5) * denom_inv); - const float texel0to1ratio = lerp(w1_5off/(w0_5off + w1_5off), 0.5, error_blurring); - const float texel2to3ratio = lerp(w3_5off/(w2_5off + w3_5off), 0.5, error_blurring); - // We don't share sample0*, so use the nearest destination fragment: - const float texel0to1ratio_nearest = w1off/(w0off + w1off); - const float texel1to2ratio_nearest = w2off/(w1off + w2off); - // Statically compute texel offsets from the bottom-right fragment to each - // bilinear sample in the bottom-right quadrant: - const float2 sample0curr_texel_offset = float2(0.0, 0.0) + float2(texel0to1ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjx_texel_offset = float2(-1.0, 0.0) + float2(-texel1to2ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjy_texel_offset = float2(0.0, -1.0) + float2(texel0to1ratio_nearest, -texel1to2ratio_nearest); - const float2 sample0diag_texel_offset = float2(-1.0, -1.0) + float2(-texel1to2ratio_nearest, -texel1to2ratio_nearest); - const float2 sample1_texel_offset = float2(2.0, 0.0) + float2(texel2to3ratio, texel0to1ratio); - const float2 sample2_texel_offset = float2(0.0, 2.0) + float2(texel0to1ratio, texel2to3ratio); - const float2 sample3_texel_offset = float2(2.0, 2.0) + float2(texel2to3ratio, texel2to3ratio); - - // CALCULATE KERNEL WEIGHTS: - // Statically compute bilinear sample weights at each destination fragment - // from the sum of their 4 texel weights (details in tex2Dblur12x12shared). - #define GET_TEXEL_QUAD_WEIGHTS(xoff, yoff) \ - (exp(-LENGTH_SQ(float2(xoff, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff, yoff + 1.0)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff + 1.0)) * denom_inv)) - const float w3diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -4.0); - const float w2diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -4.0); - const float w2adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -4.0); - const float w3adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -4.0); - const float w1diag = GET_TEXEL_QUAD_WEIGHTS(-4.0, -2.0); - const float w0diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -2.0); - const float w0adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -2.0); - const float w1adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -2.0); - const float w1adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 0.0); - const float w0adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 0.0); - const float w0curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 0.0); - const float w1curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 0.0); - const float w3adjx = GET_TEXEL_QUAD_WEIGHTS(-4.0, 2.0); - const float w2adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 2.0); - const float w2curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 2.0); - const float w3curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 2.0); - #undef GET_TEXEL_QUAD_WEIGHTS - // Statically pack weights for runtime: - const float4 w0 = float4(w0curr, w0adjx, w0adjy, w0diag); - const float4 w1 = float4(w1curr, w1adjx, w1adjy, w1diag); - const float4 w2 = float4(w2curr, w2adjx, w2adjy, w2diag); - const float4 w3 = float4(w3curr, w3adjx, w3adjy, w3diag); - // Get the weight sum inverse (normalization factor): - const float4 weight_sum4 = w0 + w1 + w2 + w3; - const float2 weight_sum2 = weight_sum4.xy + weight_sum4.zw; - const float weight_sum = weight_sum2.x + weight_sum2.y; - const float weight_sum_inv = 1.0/(weight_sum); - - // LOAD TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Get a uv vector from texel 0q0 of this quadrant to texel 0q3: - const float2 dxdy_curr = dxdy * quad_vector.xy; - // Load bilinear samples for the current quadrant (for this fragment): - const float3 sample0curr = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0curr_texel_offset).rgb; - const float3 sample0adjx = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjx_texel_offset).rgb; - const float3 sample0adjy = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjy_texel_offset).rgb; - const float3 sample0diag = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0diag_texel_offset).rgb; - const float3 sample1curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample1_texel_offset)).rgb; - const float3 sample2curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample2_texel_offset)).rgb; - const float3 sample3curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample3_texel_offset)).rgb; - - // GATHER NEIGHBORING SAMPLES AND SUM WEIGHTED SAMPLES: - // Fetch the samples from other fragments in the 2x2 quad: - float3 sample1adjx, sample1adjy, sample1diag; - float3 sample2adjx, sample2adjy, sample2diag; - float3 sample3adjx, sample3adjy, sample3diag; - quad_gather(quad_vector, sample1curr, sample1adjx, sample1adjy, sample1diag); - quad_gather(quad_vector, sample2curr, sample2adjx, sample2adjy, sample2diag); - quad_gather(quad_vector, sample3curr, sample3adjx, sample3adjy, sample3diag); - // Statically normalize weights (so total = 1.0), and sum weighted samples. - // Fill each row of a matrix with an rgb sample and pre-multiply by the - // weights to obtain a weighted result: - float3 sum = 0.0.xxx; - sum += mul(w0, float4x3(sample0curr, sample0adjx, sample0adjy, sample0diag)); - sum += mul(w1, float4x3(sample1curr, sample1adjx, sample1adjy, sample1diag)); - sum += mul(w2, float4x3(sample2curr, sample2adjx, sample2adjy, sample2diag)); - sum += mul(w3, float4x3(sample3curr, sample3adjx, sample3adjy, sample3diag)); - return sum * weight_sum_inv; -} - -float3 tex2Dblur6x6shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector, - const float sigma) -{ - // Perform a 1-pass mipmapped blur with shared samples across a pixel quad. - // Requires: Same as tex2Dblur12x12shared() - // Returns: A blurred texture lookup using a "virtual" 6x6 Gaussian - // blur (a 3x3 blur of carefully selected bilinear samples) - // of the given mip level. There will be some inaccuracies,subtle inaccuracies, - // especially for small or high-frequency detailed sources. - // Description: - // First see the description for tex2Dblur8x8shared(). This - // function shares the same concept and sample placement, but each fragment - // only uses 9 of the 16 samples taken across the pixel quad (to cover a - // 3x3 sample area, or 6x6 texel area), and it uses a lower standard - // deviation to compensate. Thanks to symmetry, the 7 omitted samples - // are always the "same:" - // 1adjx, 3adjx - // 2adjy, 3adjy - // 1diag, 2diag, 3diag - - // COMPUTE COORDS FOR TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Statically compute bilinear sampling offsets (details in tex2Dblur12x12shared). - const float denom_inv = 0.5/(sigma*sigma); - const float w0off = 1.0; - const float w0_5off = exp(-(0.5*0.5) * denom_inv); - const float w1off = exp(-(1.0*1.0) * denom_inv); - const float w1_5off = exp(-(1.5*1.5) * denom_inv); - const float w2off = exp(-(2.0*2.0) * denom_inv); - const float w2_5off = exp(-(2.5*2.5) * denom_inv); - const float w3_5off = exp(-(3.5*3.5) * denom_inv); - const float texel0to1ratio = lerp(w1_5off/(w0_5off + w1_5off), 0.5, error_blurring); - const float texel2to3ratio = lerp(w3_5off/(w2_5off + w3_5off), 0.5, error_blurring); - // We don't share sample0*, so use the nearest destination fragment: - const float texel0to1ratio_nearest = w1off/(w0off + w1off); - const float texel1to2ratio_nearest = w2off/(w1off + w2off); - // Statically compute texel offsets from the bottom-right fragment to each - // bilinear sample in the bottom-right quadrant: - const float2 sample0curr_texel_offset = float2(0.0, 0.0) + float2(texel0to1ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjx_texel_offset = float2(-1.0, 0.0) + float2(-texel1to2ratio_nearest, texel0to1ratio_nearest); - const float2 sample0adjy_texel_offset = float2(0.0, -1.0) + float2(texel0to1ratio_nearest, -texel1to2ratio_nearest); - const float2 sample0diag_texel_offset = float2(-1.0, -1.0) + float2(-texel1to2ratio_nearest, -texel1to2ratio_nearest); - const float2 sample1_texel_offset = float2(2.0, 0.0) + float2(texel2to3ratio, texel0to1ratio); - const float2 sample2_texel_offset = float2(0.0, 2.0) + float2(texel0to1ratio, texel2to3ratio); - const float2 sample3_texel_offset = float2(2.0, 2.0) + float2(texel2to3ratio, texel2to3ratio); - - // CALCULATE KERNEL WEIGHTS: - // Statically compute bilinear sample weights at each destination fragment - // from the sum of their 4 texel weights (details in tex2Dblur12x12shared). - #define GET_TEXEL_QUAD_WEIGHTS(xoff, yoff) \ - (exp(-LENGTH_SQ(float2(xoff, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff, yoff + 1.0)) * denom_inv) + \ - exp(-LENGTH_SQ(float2(xoff + 1.0, yoff + 1.0)) * denom_inv)) - // We only need 9 of the 16 sample weights. Skip the following weights: - // 1adjx, 3adjx - // 2adjy, 3adjy - // 1diag, 2diag, 3diag - const float w0diag = GET_TEXEL_QUAD_WEIGHTS(-2.0, -2.0); - const float w0adjy = GET_TEXEL_QUAD_WEIGHTS(0.0, -2.0); - const float w1adjy = GET_TEXEL_QUAD_WEIGHTS(2.0, -2.0); - const float w0adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 0.0); - const float w0curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 0.0); - const float w1curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 0.0); - const float w2adjx = GET_TEXEL_QUAD_WEIGHTS(-2.0, 2.0); - const float w2curr = GET_TEXEL_QUAD_WEIGHTS(0.0, 2.0); - const float w3curr = GET_TEXEL_QUAD_WEIGHTS(2.0, 2.0); - #undef GET_TEXEL_QUAD_WEIGHTS - // Get the weight sum inverse (normalization factor): - const float weight_sum_inv = 1.0/(w0curr + w1curr + w2curr + w3curr + - w0adjx + w2adjx + w0adjy + w1adjy + w0diag); - // Statically pack some weights for runtime: - const float4 w0 = float4(w0curr, w0adjx, w0adjy, w0diag); - - // LOAD TEXTURE SAMPLES THIS FRAGMENT IS RESPONSIBLE FOR: - // Get a uv vector from texel 0q0 of this quadrant to texel 0q3: - const float2 dxdy_curr = dxdy * quad_vector.xy; - // Load bilinear samples for the current quadrant (for this fragment): - const float3 sample0curr = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0curr_texel_offset).rgb; - const float3 sample0adjx = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjx_texel_offset).rgb; - const float3 sample0adjy = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0adjy_texel_offset).rgb; - const float3 sample0diag = tex2D_linearize(tex, tex_uv.xy + dxdy_curr * sample0diag_texel_offset).rgb; - const float3 sample1curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample1_texel_offset)).rgb; - const float3 sample2curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample2_texel_offset)).rgb; - const float3 sample3curr = tex2Dlod_linearize(tex, tex_uv + uv2_to_uv4(dxdy_curr * sample3_texel_offset)).rgb; - - // GATHER NEIGHBORING SAMPLES AND SUM WEIGHTED SAMPLES: - // Fetch the samples from other fragments in the 2x2 quad: - float3 sample1adjx, sample1adjy, sample1diag; - float3 sample2adjx, sample2adjy, sample2diag; - quad_gather(quad_vector, sample1curr, sample1adjx, sample1adjy, sample1diag); - quad_gather(quad_vector, sample2curr, sample2adjx, sample2adjy, sample2diag); - // Statically normalize weights (so total = 1.0), and sum weighted samples. - // Fill each row of a matrix with an rgb sample and pre-multiply by the - // weights to obtain a weighted result for sample1*, and handle the rest - // of the weights more directly/verbosely: - float3 sum = 0.0.xxx; - sum += mul(w0, float4x3(sample0curr, sample0adjx, sample0adjy, sample0diag)); - sum += w1curr * sample1curr + w1adjy * sample1adjy + w2curr * sample2curr + - w2adjx * sample2adjx + w3curr * sample3curr; - return sum * weight_sum_inv; -} - - -/////////////////////// MAX OPTIMAL SIGMA BLUR WRAPPERS ////////////////////// - -// The following blurs are static wrappers around the dynamic blurs above. -// HOPEFULLY, the compiler will be smart enough to do constant-folding. - -// Resizable separable blurs: -float3 tex2Dblur11resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur11resize(tex, tex_uv, dxdy, blur11_std_dev); -} -float3 tex2Dblur9resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur9resize(tex, tex_uv, dxdy, blur9_std_dev); -} -float3 tex2Dblur7resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur7resize(tex, tex_uv, dxdy, blur7_std_dev); -} -float3 tex2Dblur5resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur5resize(tex, tex_uv, dxdy, blur5_std_dev); -} -float3 tex2Dblur3resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur3resize(tex, tex_uv, dxdy, blur3_std_dev); -} -// Fast separable blurs: -float3 tex2Dblur11fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur11fast(tex, tex_uv, dxdy, blur11_std_dev); -} -float3 tex2Dblur9fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur9fast(tex, tex_uv, dxdy, blur9_std_dev); -} -float3 tex2Dblur7fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur7fast(tex, tex_uv, dxdy, blur7_std_dev); -} -float3 tex2Dblur5fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur5fast(tex, tex_uv, dxdy, blur5_std_dev); -} -float3 tex2Dblur3fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur3fast(tex, tex_uv, dxdy, blur3_std_dev); -} -// Huge, "fast" separable blurs: -float3 tex2Dblur43fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur43fast(tex, tex_uv, dxdy, blur43_std_dev); -} -float3 tex2Dblur31fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur31fast(tex, tex_uv, dxdy, blur31_std_dev); -} -float3 tex2Dblur25fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur25fast(tex, tex_uv, dxdy, blur25_std_dev); -} -float3 tex2Dblur17fast(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur17fast(tex, tex_uv, dxdy, blur17_std_dev); -} -// Resizable one-pass blurs: -float3 tex2Dblur3x3resize(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur3x3resize(tex, tex_uv, dxdy, blur3_std_dev); -} -// "Fast" one-pass blurs: -float3 tex2Dblur9x9(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur9x9(tex, tex_uv, dxdy, blur9_std_dev); -} -float3 tex2Dblur7x7(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur7x7(tex, tex_uv, dxdy, blur7_std_dev); -} -float3 tex2Dblur5x5(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur5x5(tex, tex_uv, dxdy, blur5_std_dev); -} -float3 tex2Dblur3x3(const sampler2D tex, const float2 tex_uv, - const float2 dxdy) -{ - return tex2Dblur3x3(tex, tex_uv, dxdy, blur3_std_dev); -} -// "Fast" shared-sample one-pass blurs: -float3 tex2Dblur12x12shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector) -{ - return tex2Dblur12x12shared(tex, tex_uv, dxdy, quad_vector, blur12_std_dev); -} -float3 tex2Dblur10x10shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector) -{ - return tex2Dblur10x10shared(tex, tex_uv, dxdy, quad_vector, blur10_std_dev); -} -float3 tex2Dblur8x8shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector) -{ - return tex2Dblur8x8shared(tex, tex_uv, dxdy, quad_vector, blur8_std_dev); -} -float3 tex2Dblur6x6shared(const sampler2D tex, - const float4 tex_uv, const float2 dxdy, const float4 quad_vector) -{ - return tex2Dblur6x6shared(tex, tex_uv, dxdy, quad_vector, blur6_std_dev); -} - - -#endif // BLUR_FUNCTIONS_H - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/derived-settings-and-constants.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/derived-settings-and-constants.fxh deleted file mode 100644 index 95f4a8cde..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/derived-settings-and-constants.fxh +++ /dev/null @@ -1,299 +0,0 @@ -#ifndef DERIVED_SETTINGS_AND_CONSTANTS_H -#define DERIVED_SETTINGS_AND_CONSTANTS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// These macros and constants can be used across the whole codebase. -// Unlike the values in user-settings.cgh, end users shouldn't modify these. - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "user-settings.fxh" -#include "user-cgp-constants.fxh" - - -/////////////////////////////// FIXED SETTINGS /////////////////////////////// - -// Avoid dividing by zero; using a macro overloads for float, float2, etc.: -//#define FIX_ZERO(c) (max(abs(c), 0.0000152587890625)) // 2^-16 - -// Ensure the first pass decodes CRT gamma and the last encodes LCD gamma. -#ifndef SIMULATE_CRT_ON_LCD - #define SIMULATE_CRT_ON_LCD -#endif - -// Manually tiling a manually resized texture creates texture coord derivative -// discontinuities and confuses anisotropic filtering, causing discolored tile -// seams in the phosphor mask. Workarounds: -// a.) Using tex2Dlod disables anisotropic filtering for tiled masks. It's -// downgraded to tex2Dbias without DRIVERS_ALLOW_TEX2DLOD #defined and -// disabled without DRIVERS_ALLOW_TEX2DBIAS #defined either. -// b.) "Tile flat twice" requires drawing two full tiles without border padding -// to the resized mask FBO, and it's incompatible with same-pass curvature. -// (Same-pass curvature isn't used but could be in the future...maybe.) -// c.) "Fix discontinuities" requires derivatives and drawing one tile with -// border padding to the resized mask FBO, but it works with same-pass -// curvature. It's disabled without DRIVERS_ALLOW_DERIVATIVES #defined. -// Precedence: a, then, b, then c (if multiple strategies are #defined). - #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD // 129.7 FPS, 4x, flat; 101.8 at fullscreen - #define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE // 128.1 FPS, 4x, flat; 101.5 at fullscreen - #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES // 124.4 FPS, 4x, flat; 97.4 at fullscreen -// Also, manually resampling the phosphor mask is slightly blurrier with -// anisotropic filtering. (Resampling with mipmapping is even worse: It -// creates artifacts, but only with the fully bloomed shader.) The difference -// is subtle with small triads, but you can fix it for a small cost. - //#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD - - -////////////////////////////// DERIVED SETTINGS ////////////////////////////// - -// Intel HD 4000 GPU's can't handle manual mask resizing (for now), setting the -// geometry mode at runtime, or a 4x4 true Gaussian resize. Disable -// incompatible settings ASAP. (INTEGRATED_GRAPHICS_COMPATIBILITY_MODE may be -// #defined by either user-settings.h or a wrapper .cg that #includes the -// current .cg pass.) -#ifdef INTEGRATED_GRAPHICS_COMPATIBILITY_MODE - #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE - #undef PHOSPHOR_MASK_MANUALLY_RESIZE - #endif - #ifdef RUNTIME_GEOMETRY_MODE - #undef RUNTIME_GEOMETRY_MODE - #endif - // Mode 2 (4x4 Gaussian resize) won't work, and mode 1 (3x3 blur) is - // inferior in most cases, so replace 2.0 with 0.0: - static const float bloom_approx_filter = - bloom_approx_filter_static > 1.5 ? 0.0 : bloom_approx_filter_static; -#else - static const float bloom_approx_filter = bloom_approx_filter_static; -#endif - -// Disable slow runtime paths if static parameters are used. Most of these -// won't be a problem anyway once the params are disabled, but some will. -#ifndef RUNTIME_SHADER_PARAMS_ENABLE - #ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA - #undef RUNTIME_PHOSPHOR_BLOOM_SIGMA - #endif - #ifdef RUNTIME_ANTIALIAS_WEIGHTS - #undef RUNTIME_ANTIALIAS_WEIGHTS - #endif - #ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS - #undef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS - #endif - #ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE - #undef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE - #endif - #ifdef RUNTIME_GEOMETRY_TILT - #undef RUNTIME_GEOMETRY_TILT - #endif - #ifdef RUNTIME_GEOMETRY_MODE - #undef RUNTIME_GEOMETRY_MODE - #endif - #ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - #undef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - #endif -#endif - -// Make tex2Dbias a backup for tex2Dlod for wider compatibility. -#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD - #define ANISOTROPIC_TILING_COMPAT_TEX2DBIAS -#endif -#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD - #define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS -#endif -// Rule out unavailable anisotropic compatibility strategies: -#ifndef DRIVERS_ALLOW_DERIVATIVES - #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - #endif -#endif -#ifndef DRIVERS_ALLOW_TEX2DLOD - #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD - #undef ANISOTROPIC_TILING_COMPAT_TEX2DLOD - #endif - #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD - #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD - #endif - #ifdef ANTIALIAS_DISABLE_ANISOTROPIC - #undef ANTIALIAS_DISABLE_ANISOTROPIC - #endif -#endif -#ifndef DRIVERS_ALLOW_TEX2DBIAS - #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS - #undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS - #endif - #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS - #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS - #endif -#endif -// Prioritize anisotropic tiling compatibility strategies by performance and -// disable unused strategies. This concentrates all the nesting in one place. -#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD - #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS - #undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS - #endif - #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE - #undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE - #endif - #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - #endif -#else - #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS - #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE - #undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE - #endif - #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - #endif - #else - // ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE is only compatible with - // flat texture coords in the same pass, but that's all we use. - #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE - #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - #endif - #endif - #endif -#endif -// The tex2Dlod and tex2Dbias strategies share a lot in common, and we can -// reduce some #ifdef nesting in the next section by essentially OR'ing them: -#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD - #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY -#endif -#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS - #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY -#endif -// Prioritize anisotropic resampling compatibility strategies the same way: -#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD - #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS - #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS - #endif -#endif - - -/////////////////////// DERIVED PHOSPHOR MASK CONSTANTS ////////////////////// - -// If we can use the large mipmapped LUT without mipmapping artifacts, we -// should: It gives us more options for using fewer samples. -#ifdef DRIVERS_ALLOW_TEX2DLOD - #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD - // TODO: Take advantage of this! - #define PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT - static const float2 mask_resize_src_lut_size = mask_texture_large_size; - #else - static const float2 mask_resize_src_lut_size = mask_texture_small_size; - #endif -#else - static const float2 mask_resize_src_lut_size = mask_texture_small_size; -#endif - - -// tex2D's sampler2D parameter MUST be a uniform global, a uniform input to -// main_fragment, or a static alias of one of the above. This makes it hard -// to select the phosphor mask at runtime: We can't even assign to a uniform -// global in the vertex shader or select a sampler2D in the vertex shader and -// pass it to the fragment shader (even with explicit TEXUNIT# bindings), -// because it just gives us the input texture or a black screen. However, we -// can get around these limitations by calling tex2D three times with different -// uniform samplers (or resizing the phosphor mask three times altogether). -// With dynamic branches, we can process only one of these branches on top of -// quickly discarding fragments we don't need (cgc seems able to overcome -// limigations around dependent texture fetches inside of branches). Without -// dynamic branches, we have to process every branch for every fragment...which -// is slower. Runtime sampling mode selection is slower without dynamic -// branches as well. Let the user's static #defines decide if it's worth it. -#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES - #define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT -#else - #ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - #define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - #endif -#endif - -// We need to render some minimum number of tiles in the resize passes. -// We need at least 1.0 just to repeat a single tile, and we need extra -// padding beyond that for anisotropic filtering, discontinuitity fixing, -// antialiasing, same-pass curvature (not currently used), etc. First -// determine how many border texels and tiles we need, based on how the result -// will be sampled: -#ifdef GEOMETRY_EARLY - static const float max_subpixel_offset = aa_subpixel_r_offset_static.x; - // Most antialiasing filters have a base radius of 4.0 pixels: - static const float max_aa_base_pixel_border = 4.0 + - max_subpixel_offset; -#else - static const float max_aa_base_pixel_border = 0.0; -#endif -// Anisotropic filtering adds about 0.5 to the pixel border: -#ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY - static const float max_aniso_pixel_border = max_aa_base_pixel_border + 0.5; -#else - static const float max_aniso_pixel_border = max_aa_base_pixel_border; -#endif -// Fixing discontinuities adds 1.0 more to the pixel border: -#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - static const float max_tiled_pixel_border = max_aniso_pixel_border + 1.0; -#else - static const float max_tiled_pixel_border = max_aniso_pixel_border; -#endif -// Convert the pixel border to an integer texel border. Assume same-pass -// curvature about triples the texel frequency: -#ifdef GEOMETRY_EARLY - static const float max_mask_texel_border = - macro_ceil(max_tiled_pixel_border * 3.0); -#else - static const float max_mask_texel_border = macro_ceil(max_tiled_pixel_border); -#endif -// Convert the texel border to a tile border using worst-case assumptions: -static const float max_mask_tile_border = max_mask_texel_border/ - (mask_min_allowed_triad_size * mask_triads_per_tile); - -// Finally, set the number of resized tiles to render to MASK_RESIZE, and set -// the starting texel (inside borders) for sampling it. -#ifndef GEOMETRY_EARLY - #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE - // Special case: Render two tiles without borders. Anisotropic - // filtering doesn't seem to be a problem here. - static const float mask_resize_num_tiles = 1.0 + 1.0; - static const float mask_start_texels = 0.0; - #else - static const float mask_resize_num_tiles = 1.0 + - 2.0 * max_mask_tile_border; - static const float mask_start_texels = max_mask_texel_border; - #endif -#else - static const float mask_resize_num_tiles = 1.0 + 2.0*max_mask_tile_border; - static const float mask_start_texels = max_mask_texel_border; -#endif - -// We have to fit mask_resize_num_tiles into an FBO with a viewport scale of -// mask_resize_viewport_scale. This limits the maximum final triad size. -// Estimate the minimum number of triads we can split the screen into in each -// dimension (we'll be as correct as mask_resize_viewport_scale is): -static const float mask_resize_num_triads = - mask_resize_num_tiles * mask_triads_per_tile; -static const float2 min_allowed_viewport_triads = - mask_resize_num_triads.xx / mask_resize_viewport_scale; - -#endif // DERIVED_SETTINGS_AND_CONSTANTS_H - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/gamma-management.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/gamma-management.fxh deleted file mode 100644 index 83ef1db46..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/gamma-management.fxh +++ /dev/null @@ -1,545 +0,0 @@ -#ifndef GAMMA_MANAGEMENT_H -#define GAMMA_MANAGEMENT_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// This file provides gamma-aware tex*D*() and encode_output() functions. -// Requires: Before #include-ing this file, the including file must #define -// the following macros when applicable and follow their rules: -// 1.) #define FIRST_PASS if this is the first pass. -// 2.) #define LAST_PASS if this is the last pass. -// 3.) If sRGB is available, set srgb_framebufferN = "true" for -// every pass except the last in your .cgp preset. -// 4.) If sRGB isn't available but you want gamma-correctness with -// no banding, #define GAMMA_ENCODE_EVERY_FBO each pass. -// 5.) #define SIMULATE_CRT_ON_LCD if desired (precedence over 5-7) -// 6.) #define SIMULATE_GBA_ON_LCD if desired (precedence over 6-7) -// 7.) #define SIMULATE_LCD_ON_CRT if desired (precedence over 7) -// 8.) #define SIMULATE_GBA_ON_CRT if desired (precedence over -) -// If an option in [5, 8] is #defined in the first or last pass, it -// should be #defined for both. It shouldn't make a difference -// whether it's #defined for intermediate passes or not. -// Optional: The including file (or an earlier included file) may optionally -// #define a number of macros indicating it will override certain -// macros and associated constants are as follows: -// static constants with either static or uniform constants. The -// 1.) OVERRIDE_STANDARD_GAMMA: The user must first define: -// static const float ntsc_gamma -// static const float pal_gamma -// static const float crt_reference_gamma_high -// static const float crt_reference_gamma_low -// static const float lcd_reference_gamma -// static const float crt_office_gamma -// static const float lcd_office_gamma -// 2.) OVERRIDE_DEVICE_GAMMA: The user must first define: -// static const float crt_gamma -// static const float gba_gamma -// static const float lcd_gamma -// 3.) OVERRIDE_FINAL_GAMMA: The user must first define: -// static const float input_gamma -// static const float intermediate_gamma -// static const float output_gamma -// (intermediate_gamma is for GAMMA_ENCODE_EVERY_FBO.) -// 4.) OVERRIDE_ALPHA_ASSUMPTIONS: The user must first define: -// static const bool assume_opaque_alpha -// The gamma constant overrides must be used in every pass or none, -// and OVERRIDE_FINAL_GAMMA bypasses all of the SIMULATE* macros. -// OVERRIDE_ALPHA_ASSUMPTIONS may be set on a per-pass basis. -// Usage: After setting macros appropriately, ignore gamma correction and -// replace all tex*D*() calls with equivalent gamma-aware -// tex*D*_linearize calls, except: -// 1.) When you read an LUT, use regular tex*D or a gamma-specified -// function, depending on its gamma encoding: -// tex*D*_linearize_gamma (takes a runtime gamma parameter) -// 2.) If you must read pass0's original input in a later pass, use -// tex2D_linearize_ntsc_gamma. If you want to read pass0's -// input with gamma-corrected bilinear filtering, consider -// creating a first linearizing pass and reading from the input -// of pass1 later. -// Then, return encode_output(color) from every fragment shader. -// Finally, use the global gamma_aware_bilinear boolean if you want -// to statically branch based on whether bilinear filtering is -// gamma-correct or not (e.g. for placing Gaussian blur samples). -// -// Detailed Policy: -// tex*D*_linearize() functions enforce a consistent gamma-management policy -// based on the FIRST_PASS and GAMMA_ENCODE_EVERY_FBO settings. They assume -// their input texture has the same encoding characteristics as the input for -// the current pass (which doesn't apply to the exceptions listed above). -// Similarly, encode_output() enforces a policy based on the LAST_PASS and -// GAMMA_ENCODE_EVERY_FBO settings. Together, they result in one of the -// following two pipelines. -// Typical pipeline with intermediate sRGB framebuffers: -// linear_color = pow(pass0_encoded_color, input_gamma); -// intermediate_output = linear_color; // Automatic sRGB encoding -// linear_color = intermediate_output; // Automatic sRGB decoding -// final_output = pow(intermediate_output, 1.0/output_gamma); -// Typical pipeline without intermediate sRGB framebuffers: -// linear_color = pow(pass0_encoded_color, input_gamma); -// intermediate_output = pow(linear_color, 1.0/intermediate_gamma); -// linear_color = pow(intermediate_output, intermediate_gamma); -// final_output = pow(intermediate_output, 1.0/output_gamma); -// Using GAMMA_ENCODE_EVERY_FBO is much slower, but it's provided as a way to -// easily get gamma-correctness without banding on devices where sRGB isn't -// supported. -// -// Use This Header to Maximize Code Reuse: -// The purpose of this header is to provide a consistent interface for texture -// reads and output gamma-encoding that localizes and abstracts away all the -// annoying details. This greatly reduces the amount of code in each shader -// pass that depends on the pass number in the .cgp preset or whether sRGB -// FBO's are being used: You can trivially change the gamma behavior of your -// whole pass by commenting or uncommenting 1-3 #defines. To reuse the same -// code in your first, Nth, and last passes, you can even put it all in another -// header file and #include it from skeleton .cg files that #define the -// appropriate pass-specific settings. -// -// Rationale for Using Three Macros: -// This file uses GAMMA_ENCODE_EVERY_FBO instead of an opposite macro like -// SRGB_PIPELINE to ensure sRGB is assumed by default, which hopefully imposes -// a lower maintenance burden on each pass. At first glance it seems we could -// accomplish everything with two macros: GAMMA_CORRECT_IN / GAMMA_CORRECT_OUT. -// This works for simple use cases where input_gamma == output_gamma, but it -// breaks down for more complex scenarios like CRT simulation, where the pass -// number determines the gamma encoding of the input and output. - - -/////////////////////////////// BASE CONSTANTS /////////////////////////////// - -// Set standard gamma constants, but allow users to override them: -#ifndef OVERRIDE_STANDARD_GAMMA - // Standard encoding gammas: - static const float ntsc_gamma = 2.2; // Best to use NTSC for PAL too? - static const float pal_gamma = 2.8; // Never actually 2.8 in practice - // Typical device decoding gammas (only use for emulating devices): - // CRT/LCD reference gammas are higher than NTSC and Rec.709 video standard - // gammas: The standards purposely undercorrected for an analog CRT's - // assumed 2.5 reference display gamma to maintain contrast in assumed - // [dark] viewing conditions: http://www.poynton.com/PDFs/GammaFAQ.pdf - // These unstated assumptions about display gamma and perceptual rendering - // intent caused a lot of confusion, and more modern CRT's seemed to target - // NTSC 2.2 gamma with circuitry. LCD displays seem to have followed suit - // (they struggle near black with 2.5 gamma anyway), especially PC/laptop - // displays designed to view sRGB in bright environments. (Standards are - // also in flux again with BT.1886, but it's underspecified for displays.) - static const float crt_reference_gamma_high = 2.5; // In (2.35, 2.55) - static const float crt_reference_gamma_low = 2.35; // In (2.35, 2.55) - static const float lcd_reference_gamma = 2.5; // To match CRT - static const float crt_office_gamma = 2.2; // Circuitry-adjusted for NTSC - static const float lcd_office_gamma = 2.2; // Approximates sRGB -#endif // OVERRIDE_STANDARD_GAMMA - -// Assuming alpha == 1.0 might make it easier for users to avoid some bugs, -// but only if they're aware of it. -#ifndef OVERRIDE_ALPHA_ASSUMPTIONS - static const bool assume_opaque_alpha = false; -#endif - - -/////////////////////// DERIVED CONSTANTS AS FUNCTIONS /////////////////////// - -// gamma-management.h should be compatible with overriding gamma values with -// runtime user parameters, but we can only define other global constants in -// terms of static constants, not uniform user parameters. To get around this -// limitation, we need to define derived constants using functions. - -// Set device gamma constants, but allow users to override them: -#ifdef OVERRIDE_DEVICE_GAMMA - // The user promises to globally define the appropriate constants: - float get_crt_gamma() { return crt_gamma; } - float get_gba_gamma() { return gba_gamma; } - float get_lcd_gamma() { return lcd_gamma; } -#else - float get_crt_gamma() { return crt_reference_gamma_high; } - float get_gba_gamma() { return 3.5; } // Game Boy Advance; in (3.0, 4.0) - float get_lcd_gamma() { return lcd_office_gamma; } -#endif // OVERRIDE_DEVICE_GAMMA - -// Set decoding/encoding gammas for the first/lass passes, but allow overrides: -#ifdef OVERRIDE_FINAL_GAMMA - // The user promises to globally define the appropriate constants: - float get_intermediate_gamma() { return intermediate_gamma; } - float get_input_gamma() { return input_gamma; } - float get_output_gamma() { return output_gamma; } -#else - // If we gamma-correct every pass, always use ntsc_gamma between passes to - // ensure middle passes don't need to care if anything is being simulated: - float get_intermediate_gamma() { return ntsc_gamma; } - #ifdef SIMULATE_CRT_ON_LCD - float get_input_gamma() { return get_crt_gamma(); } - float get_output_gamma() { return get_lcd_gamma(); } - #else - #ifdef SIMULATE_GBA_ON_LCD - float get_input_gamma() { return get_gba_gamma(); } - float get_output_gamma() { return get_lcd_gamma(); } - #else - #ifdef SIMULATE_LCD_ON_CRT - float get_input_gamma() { return get_lcd_gamma(); } - float get_output_gamma() { return get_crt_gamma(); } - #else - #ifdef SIMULATE_GBA_ON_CRT - float get_input_gamma() { return get_gba_gamma(); } - float get_output_gamma() { return get_crt_gamma(); } - #else // Don't simulate anything: - float get_input_gamma() { return ntsc_gamma; } - float get_output_gamma() { return ntsc_gamma; } - #endif // SIMULATE_GBA_ON_CRT - #endif // SIMULATE_LCD_ON_CRT - #endif // SIMULATE_GBA_ON_LCD - #endif // SIMULATE_CRT_ON_LCD -#endif // OVERRIDE_FINAL_GAMMA - -// Set decoding/encoding gammas for the current pass. Use static constants for -// linearize_input and gamma_encode_output, because they aren't derived, and -// they let the compiler do dead-code elimination. -#ifndef GAMMA_ENCODE_EVERY_FBO - #ifdef FIRST_PASS - static const bool linearize_input = true; - float get_pass_input_gamma() { return get_input_gamma(); } - #else - static const bool linearize_input = false; - float get_pass_input_gamma() { return 1.0; } - #endif - #ifdef LAST_PASS - static const bool gamma_encode_output = true; - float get_pass_output_gamma() { return get_output_gamma(); } - #else - static const bool gamma_encode_output = false; - float get_pass_output_gamma() { return 1.0; } - #endif -#else - static const bool linearize_input = true; - static const bool gamma_encode_output = true; - #ifdef FIRST_PASS - float get_pass_input_gamma() { return get_input_gamma(); } - #else - float get_pass_input_gamma() { return get_intermediate_gamma(); } - #endif - #ifdef LAST_PASS - float get_pass_output_gamma() { return get_output_gamma(); } - #else - float get_pass_output_gamma() { return get_intermediate_gamma(); } - #endif -#endif - -// Users might want to know if bilinear filtering will be gamma-correct: -static const bool gamma_aware_bilinear = !linearize_input; - - -////////////////////// COLOR ENCODING/DECODING FUNCTIONS ///////////////////// - -float4 encode_output(const float4 color) -{ - if(gamma_encode_output) - { - if(assume_opaque_alpha) - { - return float4(pow(color.rgb, 1.0/get_pass_output_gamma()), 1.0); - } - else - { - return float4(pow(color.rgb, 1.0/get_pass_output_gamma()), color.a); - } - } - else - { - return color; - } -} - -float4 decode_input(const float4 color) -{ - return color; -} - -float4 decode_input_first(const float4 color) -{ - if(assume_opaque_alpha) - { - return float4(pow(color.rgb, get_input_gamma()), 1.0); - } - else - { - return float4(pow(color.rgb, get_input_gamma()), color.a); - } -} - - -float4 decode_gamma_input(const float4 color, const float3 gamma) -{ - if(assume_opaque_alpha) - { - return float4(pow(color.rgb, gamma), 1.0); - } - else - { - return float4(pow(color.rgb, gamma), color.a); - } -} - - -/////////////////////////// TEXTURE LOOKUP WRAPPERS ////////////////////////// - -// "SMART" LINEARIZING TEXTURE LOOKUP FUNCTIONS: -// Provide a wide array of linearizing texture lookup wrapper functions. The -// Cg shader spec Retroarch uses only allows for 2D textures, but 1D and 3D -// lookups are provided for completeness in case that changes someday. Nobody -// is likely to use the *fetch and *proj functions, but they're included just -// in case. The only tex*D texture sampling functions omitted are: -// - tex*Dcmpbias -// - tex*Dcmplod -// - tex*DARRAY* -// - tex*DMS* -// - Variants returning integers -// Standard line length restrictions are ignored below for vertical brevity. -/* -// tex1D: -float4 tex1D_linearize(const sampler1D tex, const float tex_coords) -{ return decode_input(tex1D(tex, tex_coords)); } - -float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords) -{ return decode_input(tex1D(tex, tex_coords)); } - -float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const int texel_off) -{ return decode_input(tex1D(tex, tex_coords, texel_off)); } - -float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const int texel_off) -{ return decode_input(tex1D(tex, tex_coords, texel_off)); } - -float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const float dx, const float dy) -{ return decode_input(tex1D(tex, tex_coords, dx, dy)); } - -float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const float dx, const float dy) -{ return decode_input(tex1D(tex, tex_coords, dx, dy)); } - -float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const float dx, const float dy, const int texel_off) -{ return decode_input(tex1D(tex, tex_coords, dx, dy, texel_off)); } - -float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const float dx, const float dy, const int texel_off) -{ return decode_input(tex1D(tex, tex_coords, dx, dy, texel_off)); } - -// tex1Dbias: -float4 tex1Dbias_linearize(const sampler1D tex, const float4 tex_coords) -{ return decode_input(tex1Dbias(tex, tex_coords)); } - -float4 tex1Dbias_linearize(const sampler1D tex, const float4 tex_coords, const int texel_off) -{ return decode_input(tex1Dbias(tex, tex_coords, texel_off)); } - -// tex1Dfetch: -float4 tex1Dfetch_linearize(const sampler1D tex, const int4 tex_coords) -{ return decode_input(tex1Dfetch(tex, tex_coords)); } - -float4 tex1Dfetch_linearize(const sampler1D tex, const int4 tex_coords, const int texel_off) -{ return decode_input(tex1Dfetch(tex, tex_coords, texel_off)); } - -// tex1Dlod: -float4 tex1Dlod_linearize(const sampler1D tex, const float4 tex_coords) -{ return decode_input(tex1Dlod(tex, tex_coords)); } - -float4 tex1Dlod_linearize(const sampler1D tex, const float4 tex_coords, const int texel_off) -{ return decode_input(tex1Dlod(tex, tex_coords, texel_off)); } - -// tex1Dproj: -float4 tex1Dproj_linearize(const sampler1D tex, const float2 tex_coords) -{ return decode_input(tex1Dproj(tex, tex_coords)); } - -float4 tex1Dproj_linearize(const sampler1D tex, const float3 tex_coords) -{ return decode_input(tex1Dproj(tex, tex_coords)); } - -float4 tex1Dproj_linearize(const sampler1D tex, const float2 tex_coords, const int texel_off) -{ return decode_input(tex1Dproj(tex, tex_coords, texel_off)); } - -float4 tex1Dproj_linearize(const sampler1D tex, const float3 tex_coords, const int texel_off) -{ return decode_input(tex1Dproj(tex, tex_coords, texel_off)); } -*/ -// tex2D: -float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords) -{ return decode_input(tex2D(tex, tex_coords)); } - -float4 tex2D_linearize_first(const sampler2D tex, const float2 tex_coords) -{ return decode_input_first(tex2D(tex, tex_coords)); } - -float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords) -{ return decode_input(tex2D(tex, tex_coords.xy)); } - -//float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const int texel_off) -//{ return decode_input(tex2D(tex, tex_coords, texel_off)); } - -//float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const int texel_off) -//{ return decode_input(tex2D(tex, tex_coords.xy, texel_off)); } -/* -float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy) -{ return decode_input(tex2D(tex, tex_coords, dx, dy)); } - -float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy) -{ return decode_input(tex2D(tex, tex_coords, dx, dy)); } - -float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const int texel_off) -{ return decode_input(tex2D(tex, tex_coords, dx, dy, texel_off)); } - -float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const int texel_off) -{ return decode_input(tex2D(tex, tex_coords, dx, dy, texel_off)); } - -// tex2Dbias: -float4 tex2Dbias_linearize(const sampler2D tex, const float4 tex_coords) -{ return decode_input(tex2Dbias(tex, tex_coords)); } - -float4 tex2Dbias_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off) -{ return decode_input(tex2Dbias(tex, tex_coords, texel_off)); } - -// tex2Dfetch: -float4 tex2Dfetch_linearize(const sampler2D tex, const int4 tex_coords) -{ return decode_input(tex2Dfetch(tex, tex_coords)); } - -float4 tex2Dfetch_linearize(const sampler2D tex, const int4 tex_coords, const int texel_off) -{ return decode_input(tex2Dfetch(tex, tex_coords, texel_off)); } -*/ -// tex2Dlod: -float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords) -{ return decode_input(tex2Dlod(tex, tex_coords)); } - -//float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off) -//{ return decode_input(tex2Dlod(tex, tex_coords, texel_off)); } -/* -// tex2Dproj: -float4 tex2Dproj_linearize(const sampler2D tex, const float3 tex_coords) -{ return decode_input(tex2Dproj(tex, tex_coords)); } - -float4 tex2Dproj_linearize(const sampler2D tex, const float4 tex_coords) -{ return decode_input(tex2Dproj(tex, tex_coords)); } - -float4 tex2Dproj_linearize(const sampler2D tex, const float3 tex_coords, const int texel_off) -{ return decode_input(tex2Dproj(tex, tex_coords, texel_off)); } - -float4 tex2Dproj_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off) -{ return decode_input(tex2Dproj(tex, tex_coords, texel_off)); } - -// tex3D: -float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords) -{ return decode_input(tex3D(tex, tex_coords)); } - -float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const int texel_off) -{ return decode_input(tex3D(tex, tex_coords, texel_off)); } - -float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const float3 dx, const float3 dy) -{ return decode_input(tex3D(tex, tex_coords, dx, dy)); } - -float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const float3 dx, const float3 dy, const int texel_off) -{ return decode_input(tex3D(tex, tex_coords, dx, dy, texel_off)); } - -// tex3Dbias: -float4 tex3Dbias_linearize(const sampler3D tex, const float4 tex_coords) -{ return decode_input(tex3Dbias(tex, tex_coords)); } - -float4 tex3Dbias_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off) -{ return decode_input(tex3Dbias(tex, tex_coords, texel_off)); } - -// tex3Dfetch: -float4 tex3Dfetch_linearize(const sampler3D tex, const int4 tex_coords) -{ return decode_input(tex3Dfetch(tex, tex_coords)); } - -float4 tex3Dfetch_linearize(const sampler3D tex, const int4 tex_coords, const int texel_off) -{ return decode_input(tex3Dfetch(tex, tex_coords, texel_off)); } - -// tex3Dlod: -float4 tex3Dlod_linearize(const sampler3D tex, const float4 tex_coords) -{ return decode_input(tex3Dlod(tex, tex_coords)); } - -float4 tex3Dlod_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off) -{ return decode_input(tex3Dlod(tex, tex_coords, texel_off)); } - -// tex3Dproj: -float4 tex3Dproj_linearize(const sampler3D tex, const float4 tex_coords) -{ return decode_input(tex3Dproj(tex, tex_coords)); } - -float4 tex3Dproj_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off) -{ return decode_input(tex3Dproj(tex, tex_coords, texel_off)); } - - -// NONSTANDARD "SMART" LINEARIZING TEXTURE LOOKUP FUNCTIONS: -// This narrow selection of nonstandard tex2D* functions can be useful: - -// tex2Dlod0: Automatically fill in the tex2D LOD parameter for mip level 0. -float4 tex2Dlod0_linearize(const sampler2D tex, const float2 tex_coords) -{ return decode_input(tex2Dlod(tex, float4(tex_coords, 0.0, 0.0))); } - -float4 tex2Dlod0_linearize(const sampler2D tex, const float2 tex_coords, const int texel_off) -{ return decode_input(tex2Dlod(tex, float4(tex_coords, 0.0, 0.0), texel_off)); } - - -// MANUALLY LINEARIZING TEXTURE LOOKUP FUNCTIONS: -// Provide a narrower selection of tex2D* wrapper functions that decode an -// input sample with a specified gamma value. These are useful for reading -// LUT's and for reading the input of pass0 in a later pass. - -// tex2D: -float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float3 gamma) -{ return decode_gamma_input(tex2D(tex, tex_coords), gamma); } - -float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float3 gamma) -{ return decode_gamma_input(tex2D(tex, tex_coords), gamma); } - -float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const int texel_off, const float3 gamma) -{ return decode_gamma_input(tex2D(tex, tex_coords, texel_off), gamma); } - -float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const int texel_off, const float3 gamma) -{ return decode_gamma_input(tex2D(tex, tex_coords, texel_off), gamma); } - -float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const float3 gamma) -{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy), gamma); } - -float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const float3 gamma) -{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy), gamma); } - -float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const int texel_off, const float3 gamma) -{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy, texel_off), gamma); } - -float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const int texel_off, const float3 gamma) -{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy, texel_off), gamma); } - -// tex2Dbias: -float4 tex2Dbias_linearize_gamma(const sampler2D tex, const float4 tex_coords, const float3 gamma) -{ return decode_gamma_input(tex2Dbias(tex, tex_coords), gamma); } - -float4 tex2Dbias_linearize_gamma(const sampler2D tex, const float4 tex_coords, const int texel_off, const float3 gamma) -{ return decode_gamma_input(tex2Dbias(tex, tex_coords, texel_off), gamma); } - -// tex2Dfetch: -float4 tex2Dfetch_linearize_gamma(const sampler2D tex, const int4 tex_coords, const float3 gamma) -{ return decode_gamma_input(tex2Dfetch(tex, tex_coords), gamma); } - -float4 tex2Dfetch_linearize_gamma(const sampler2D tex, const int4 tex_coords, const int texel_off, const float3 gamma) -{ return decode_gamma_input(tex2Dfetch(tex, tex_coords, texel_off), gamma); } -*/ -// tex2Dlod: -float4 tex2Dlod_linearize_gamma(const sampler2D tex, const float4 tex_coords, const float3 gamma) -{ return decode_gamma_input(tex2Dlod(tex, tex_coords), gamma); } - -//float4 tex2Dlod_linearize_gamma(const sampler2D tex, const float4 tex_coords, const int texel_off, const float3 gamma) -//{ return decode_gamma_input(tex2Dlod(tex, tex_coords, texel_off), gamma); } - - -#endif // GAMMA_MANAGEMENT_H - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/helper-functions-and-macros.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/helper-functions-and-macros.fxh deleted file mode 100644 index d9e1820df..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/helper-functions-and-macros.fxh +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _HELPER_FUNCTIONS_AND_MACROS_H -#define _HELPER_FUNCTIONS_AND_MACROS_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2020 Alex Gunter -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -float4 tex2D_nograd(sampler2D tex, float2 tex_coords) -{ - return tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0); -} - -// ReShade 4 does not permit the use of functions or the ternary operator -// outside of a function definition. This is a problem for this port -// because the original crt-royale shader makes heavy use of these -// constructs at the root level. - -// These preprocessor definitions are a workaround for this limitation. -// Note that they are strictly intended for defining complex global -// constants. I doubt they're more performant than the built-in -// equivalents, so I recommend using the built-ins whenever you can. - - -#define macro_sign(c) -((int) ((c) != 0)) * -((int) ((c) > 0)) -#define macro_abs(c) (c) * macro_sign(c) - -#define macro_min(c, d) (c) * ((int) ((c) <= (d))) + (d) * ((int) ((c) > (d))) -#define macro_max(c, d) (c) * ((int) ((c) >= (d))) + (d) * ((int) ((c) < (d))) -#define macro_clamp(c, l, u) macro_min(macro_max(c, l), u) - -#define macro_ceil(c) (float) ((int) (c) + (int) (((int) (c)) < (c))) - -#define macro_cond(c, a, b) float(c) * (a) + float(!(c)) * (b) - - - -//////////////////////// COMMON MATHEMATICAL CONSTANTS /////////////////////// - -static const float pi = 3.141592653589; -// We often want to find the location of the previous texel, e.g.: -// const float2 curr_texel = uv * texture_size; -// const float2 prev_texel = floor(curr_texel - float2(0.5)) + float2(0.5); -// const float2 prev_texel_uv = prev_texel / texture_size; -// However, many GPU drivers round incorrectly around exact texel locations. -// We need to subtract a little less than 0.5 before flooring, and some GPU's -// require this value to be farther from 0.5 than others; define it here. -// const float2 prev_texel = -// floor(curr_texel - float2(under_half)) + float2(0.5); -static const float under_half = 0.4995; - -// Avoid dividing by zero; using a macro overloads for float, float2, etc.: -#define FIX_ZERO(c) (macro_max(macro_abs(c), 0.0000152587890625)) // 2^-16 - -// #define fmod(x, y) ((x) - (y) * floor((x)/(y) + FIX_ZERO(0.0))) -#define fmod(x, y) (frac((x) / (y)) * (y)) - -#endif // _HELPER_FUNCTIONS_AND_MACROS_H \ No newline at end of file diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/phosphor-mask-resizing.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/phosphor-mask-resizing.fxh deleted file mode 100644 index 9d7243bd3..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/phosphor-mask-resizing.fxh +++ /dev/null @@ -1,676 +0,0 @@ -#ifndef PHOSPHOR_MASK_RESIZING_H -#define PHOSPHOR_MASK_RESIZING_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "user-settings.fxh" -#include "derived-settings-and-constants.fxh" - -///////////////////////////// CODEPATH SELECTION ///////////////////////////// - -// Choose a looping strategy based on what's allowed: -// Dynamic loops not allowed: Use a flat static loop. -// Dynamic loops accomodated: Coarsely branch around static loops. -// Dynamic loops assumed allowed: Use a flat dynamic loop. -#ifndef DRIVERS_ALLOW_DYNAMIC_BRANCHES - #ifdef ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS - #define BREAK_LOOPS_INTO_PIECES - #else - #define USE_SINGLE_STATIC_LOOP - #endif -#endif // No else needed: Dynamic loops assumed. - - -////////////////////////////////// CONSTANTS ///////////////////////////////// - -// The larger the resized tile, the fewer samples we'll need for downsizing. -// See if we can get a static min tile size > mask_min_allowed_tile_size: -static const float mask_min_allowed_tile_size = macro_ceil( - mask_min_allowed_triad_size * mask_triads_per_tile); -static const float mask_min_expected_tile_size = - mask_min_allowed_tile_size; -// Limit the number of sinc resize taps by the maximum minification factor: -static const float pi_over_lobes = pi/mask_sinc_lobes; -static const float max_sinc_resize_samples_float = 2.0 * mask_sinc_lobes * - mask_resize_src_lut_size.x/mask_min_expected_tile_size; -// Vectorized loops sample in multiples of 4. Round up to be safe: -static const float max_sinc_resize_samples_m4 = macro_ceil( - max_sinc_resize_samples_float * 0.25) * 4.0; - - -///////////////////////// RESAMPLING FUNCTION HELPERS //////////////////////// - -float get_dynamic_loop_size(const float magnification_scale) -{ - // Requires: The following global constants must be defined: - // 1.) mask_sinc_lobes - // 2.) max_sinc_resize_samples_m4 - // Returns: The minimum number of texture samples for a correct downsize - // at magnification_scale. - // We're downsizing, so the filter is sized across 2*lobes output pixels - // (not 2*lobes input texels). This impacts distance measurements and the - // minimum number of input samples needed. - const float min_samples_float = 2.0 * mask_sinc_lobes / magnification_scale; - const float min_samples_m4 = ceil(min_samples_float * 0.25) * 4.0; - #ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES - const float max_samples_m4 = max_sinc_resize_samples_m4; - #else // ifdef BREAK_LOOPS_INTO_PIECES - // Simulating loops with branches imposes a 128-sample limit. - const float max_samples_m4 = min(128.0, max_sinc_resize_samples_m4); - #endif - return min(min_samples_m4, max_samples_m4); -} - -float2 get_first_texel_tile_uv_and_dist(const float2 tex_uv, - const float2 texture_size, const float dr, - const float input_tiles_per_texture_r, const float samples, - const bool vertical) -{ - // Requires: 1.) dr == du == 1.0/texture_size.x or - // dr == dv == 1.0/texture_size.y - // (whichever direction we're resampling in). - // It's a scalar to save register space. - // 2.) input_tiles_per_texture_r is the number of input tiles - // that can fit in the input texture in the direction we're - // resampling this pass. - // 3.) vertical indicates whether we're resampling vertically - // this pass (or horizontally). - // Returns: Pack and return the first sample's tile_uv coord in [0, 1] - // and its texel distance from the destination pixel, in the - // resized dimension only. - // We'll start with the topmost or leftmost sample and work down or right, - // so get the first sample location and distance. Modify both dimensions - // as if we're doing a one-pass 2D resize; we'll throw away the unneeded - // (and incorrect) dimension at the end. - const float2 curr_texel = tex_uv * texture_size; - const float2 prev_texel = floor(curr_texel - under_half.xx) + 0.5.xx; - const float2 first_texel = prev_texel - float2(samples.xx/2.0.xx - 1.0.xx); - const float2 first_texel_uv_wrap_2D = first_texel * dr; - const float2 first_texel_dist_2D = curr_texel - first_texel; - // Convert from tex_uv to tile_uv coords so we can sub fracs for fmods. - const float2 first_texel_tile_uv_wrap_2D = - first_texel_uv_wrap_2D * input_tiles_per_texture_r; - // Project wrapped coordinates to the [0, 1] range. We'll do this with all - // samples,but the first texel is special, since it might be negative. - const float2 coord_negative = - float2(first_texel_tile_uv_wrap_2D < 0.0.xx); - const float2 first_texel_tile_uv_2D = - frac(first_texel_tile_uv_wrap_2D) + coord_negative; - // Pack the first texel's tile_uv coord and texel distance in 1D: - const float2 tile_u_and_dist = - float2(first_texel_tile_uv_2D.x, first_texel_dist_2D.x); - const float2 tile_v_and_dist = - float2(first_texel_tile_uv_2D.y, first_texel_dist_2D.y); - return vertical ? tile_v_and_dist : tile_u_and_dist; - //return lerp(tile_u_and_dist, tile_v_and_dist, float(vertical)); -} - -float4 tex2Dlod0try(const sampler2D tex, const float2 tex_uv) -{ - // Mipmapping and anisotropic filtering get confused by sinc-resampling. - // One [slow] workaround is to select the lowest mip level: - #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD - return tex2Dlod(tex, float4(tex_uv, 0.0, 0.0)); - #else - #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS - return tex2Dbias(tex, float4(tex_uv, 0.0, -16.0)); - #else - return tex2D(tex, tex_uv); - #endif - #endif -} - - -////////////////////////////// LOOP BODY MACROS ////////////////////////////// - -// Using functions can exceed the temporary register limit, so we're -// stuck with #define macros (I'm TRULY sorry). They're declared here instead -// of above to be closer to the actual invocation sites. Steps: -// 1.) Get the exact texel location. -// 2.) Sample the phosphor mask (already assumed encoded in linear RGB). -// 3.) Get the distance from the current pixel and sinc weight: -// sinc(dist) = sin(pi * dist)/(pi * dist) -// We can also use the slower/smoother Lanczos instead: -// L(x) = sinc(dist) * sinc(dist / lobes) -// 4.) Accumulate the weight sum in weights, and accumulate the weighted texels -// in pixel_color (we'll normalize outside the loop at the end). -// We vectorize the loop to help reduce the Lanczos window's cost. - - // The r coord is the coord in the dimension we're resizing along (u or v), - // and first_texel_tile_uv_rrrr is a float4 of the first texel's u or v - // tile_uv coord in [0, 1]. tex_uv_r will contain the tile_uv u or v coord - // for four new texel samples. - #define CALCULATE_R_COORD_FOR_4_SAMPLES \ - const float4 true_i = float4(i_base + i,i_base + i,i_base + i,i_base + i) + float4(0.0, 1.0, 2.0, 3.0); \ - const float4 tile_uv_r = frac( \ - first_texel_tile_uv_rrrr + true_i * tile_dr); \ - const float4 tex_uv_r = tile_uv_r * tile_size_uv_r; - - #ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW - #define CALCULATE_SINC_RESAMPLE_WEIGHTS \ - const float4 pi_dist_over_lobes = pi_over_lobes * dist; \ - const float4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\ - (pi_dist*pi_dist_over_lobes), 1.0.xxxx); - #else - #define CALCULATE_SINC_RESAMPLE_WEIGHTS \ - const float4 weights = min(sin(pi_dist)/pi_dist, 1.0.xxxx); - #endif - - #define UPDATE_COLOR_AND_WEIGHT_SUMS \ - const float4 dist = magnification_scale * \ - abs(first_dist_unscaled - true_i); \ - const float4 pi_dist = pi * dist; \ - CALCULATE_SINC_RESAMPLE_WEIGHTS; \ - pixel_color += new_sample0 * weights.xxx; \ - pixel_color += new_sample1 * weights.yyy; \ - pixel_color += new_sample2 * weights.zzz; \ - pixel_color += new_sample3 * weights.www; \ - weight_sum += weights; - - #define VERTICAL_SINC_RESAMPLE_LOOP_BODY \ - CALCULATE_R_COORD_FOR_4_SAMPLES; \ - const float3 new_sample0 = tex2Dlod0try(tex, \ - float2(tex_uv.x, tex_uv_r.x)).rgb; \ - const float3 new_sample1 = tex2Dlod0try(tex, \ - float2(tex_uv.x, tex_uv_r.y)).rgb; \ - const float3 new_sample2 = tex2Dlod0try(tex, \ - float2(tex_uv.x, tex_uv_r.z)).rgb; \ - const float3 new_sample3 = tex2Dlod0try(tex, \ - float2(tex_uv.x, tex_uv_r.w)).rgb; \ - UPDATE_COLOR_AND_WEIGHT_SUMS; - - #define HORIZONTAL_SINC_RESAMPLE_LOOP_BODY \ - CALCULATE_R_COORD_FOR_4_SAMPLES; \ - const float3 new_sample0 = tex2Dlod0try(tex, \ - float2(tex_uv_r.x, tex_uv.y)).rgb; \ - const float3 new_sample1 = tex2Dlod0try(tex, \ - float2(tex_uv_r.y, tex_uv.y)).rgb; \ - const float3 new_sample2 = tex2Dlod0try(tex, \ - float2(tex_uv_r.z, tex_uv.y)).rgb; \ - const float3 new_sample3 = tex2Dlod0try(tex, \ - float2(tex_uv_r.w, tex_uv.y)).rgb; \ - UPDATE_COLOR_AND_WEIGHT_SUMS; - - -//////////////////////////// RESAMPLING FUNCTIONS //////////////////////////// - -float3 downsample_vertical_sinc_tiled(const sampler2D tex, - const float2 tex_uv, const float2 texture_size, const float dr, - const float magnification_scale, const float tile_size_uv_r) -{ - // Requires: 1.) dr == du == 1.0/texture_size.x or - // dr == dv == 1.0/texture_size.y - // (whichever direction we're resampling in). - // It's a scalar to save register space. - // 2.) tile_size_uv_r is the number of texels an input tile - // takes up in the input texture, in the direction we're - // resampling this pass. - // 3.) magnification_scale must be <= 1.0. - // Returns: Return a [Lanczos] sinc-resampled pixel of a vertically - // downsized input tile embedded in an input texture. (The - // vertical version is special-cased though: It assumes the - // tile size equals the [static] texture size, since it's used - // on an LUT texture input containing one tile. For more - // generic use, eliminate the "static" in the parameters.) - // The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension - // we're resizing along, e.g. "dy" in this case. - #ifdef USE_SINGLE_STATIC_LOOP - // A static loop can be faster, but it might blur too much from using - // more samples than it should. - static const int samples = int(max_sinc_resize_samples_m4); - #else - const int samples = int(get_dynamic_loop_size(magnification_scale)); - #endif - - // Get the first sample location (scalar tile uv coord along the resized - // dimension) and distance from the output location (in texels): - static const float input_tiles_per_texture_r = 1.0/tile_size_uv_r; - // true = vertical resize: - const float2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist( - tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, true); - const float4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx; - const float4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy; - // Get the tile sample offset: - static const float tile_dr = dr * input_tiles_per_texture_r; - - // Sum up each weight and weighted sample color, varying the looping - // strategy based on our expected dynamic loop capabilities. See the - // loop body macros above. - int i_base = 0; - float4 weight_sum = 0.0.xxxx; - float3 pixel_color = 0.0.xxx; - static const int i_step = 4; - #ifdef BREAK_LOOPS_INTO_PIECES - if(samples - i_base >= 64) - { - for(int i = 0; i < 64; i += i_step) - { - VERTICAL_SINC_RESAMPLE_LOOP_BODY; - } - i_base += 64; - } - if(samples - i_base >= 32) - { - for(int i = 0; i < 32; i += i_step) - { - VERTICAL_SINC_RESAMPLE_LOOP_BODY; - } - i_base += 32; - } - if(samples - i_base >= 16) - { - for(int i = 0; i < 16; i += i_step) - { - VERTICAL_SINC_RESAMPLE_LOOP_BODY; - } - i_base += 16; - } - if(samples - i_base >= 8) - { - for(int i = 0; i < 8; i += i_step) - { - VERTICAL_SINC_RESAMPLE_LOOP_BODY; - } - i_base += 8; - } - if(samples - i_base >= 4) - { - for(int i = 0; i < 4; i += i_step) - { - VERTICAL_SINC_RESAMPLE_LOOP_BODY; - } - i_base += 4; - } - // Do another 4-sample block for a total of 128 max samples. - if(samples - i_base > 0) - { - for(int i = 0; i < 4; i += i_step) - { - VERTICAL_SINC_RESAMPLE_LOOP_BODY; - } - } - #else - for(int i = 0; i < samples; i += i_step) - { - VERTICAL_SINC_RESAMPLE_LOOP_BODY; - } - #endif - // Normalize so the weight_sum == 1.0, and return: - const float2 weight_sum_reduce = weight_sum.xy + weight_sum.zw; - const float3 scalar_weight_sum = float3(weight_sum_reduce.xxx + - weight_sum_reduce.yyy); - return (pixel_color/scalar_weight_sum); -} - -float3 downsample_horizontal_sinc_tiled(const sampler2D tex, - const float2 tex_uv, const float2 texture_size, const float dr, - const float magnification_scale, const float tile_size_uv_r) -{ - // Differences from downsample_horizontal_sinc_tiled: - // 1.) The dr and tile_size_uv_r parameters are not static consts. - // 2.) The "vertical" parameter to get_first_texel_tile_uv_and_dist is - // set to false instead of true. - // 3.) The horizontal version of the loop body is used. - // TODO: If we can get guaranteed compile-time dead code elimination, - // we can combine the vertical/horizontal downsampling functions by: - // 1.) Add an extra static const bool parameter called "vertical." - // 2.) Supply it with the result of get_first_texel_tile_uv_and_dist(). - // 3.) Use a conditional assignment in the loop body macro. This is the - // tricky part: We DO NOT want to incur the extra conditional - // assignment in the inner loop at runtime! - // The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension - // we're resizing along, e.g. "dx" in this case. - #ifdef USE_SINGLE_STATIC_LOOP - // If we have to load all samples, we might as well use them. - static const int samples = int(max_sinc_resize_samples_m4); - #else - const int samples = int(get_dynamic_loop_size(magnification_scale)); - #endif - - // Get the first sample location (scalar tile uv coord along resized - // dimension) and distance from the output location (in texels): - const float input_tiles_per_texture_r = 1.0/tile_size_uv_r; - // false = horizontal resize: - const float2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist( - tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, false); - const float4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx; - const float4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy; - // Get the tile sample offset: - const float tile_dr = dr * input_tiles_per_texture_r; - - // Sum up each weight and weighted sample color, varying the looping - // strategy based on our expected dynamic loop capabilities. See the - // loop body macros above. - int i_base = 0; - float4 weight_sum = 0.0.xxxx; - float3 pixel_color = 0.0.xxx; - static const int i_step = 4; - #ifdef BREAK_LOOPS_INTO_PIECES - if(samples - i_base >= 64) - { - for(int i = 0; i < 64; i += i_step) - { - HORIZONTAL_SINC_RESAMPLE_LOOP_BODY; - } - i_base += 64; - } - if(samples - i_base >= 32) - { - for(int i = 0; i < 32; i += i_step) - { - HORIZONTAL_SINC_RESAMPLE_LOOP_BODY; - } - i_base += 32; - } - if(samples - i_base >= 16) - { - for(int i = 0; i < 16; i += i_step) - { - HORIZONTAL_SINC_RESAMPLE_LOOP_BODY; - } - i_base += 16; - } - if(samples - i_base >= 8) - { - for(int i = 0; i < 8; i += i_step) - { - HORIZONTAL_SINC_RESAMPLE_LOOP_BODY; - } - i_base += 8; - } - if(samples - i_base >= 4) - { - for(int i = 0; i < 4; i += i_step) - { - HORIZONTAL_SINC_RESAMPLE_LOOP_BODY; - } - i_base += 4; - } - // Do another 4-sample block for a total of 128 max samples. - if(samples - i_base > 0) - { - for(int i = 0; i < 4; i += i_step) - { - HORIZONTAL_SINC_RESAMPLE_LOOP_BODY; - } - } - #else - for(int i = 0; i < samples; i += i_step) - { - HORIZONTAL_SINC_RESAMPLE_LOOP_BODY; - } - #endif - // Normalize so the weight_sum == 1.0, and return: - const float2 weight_sum_reduce = weight_sum.xy + weight_sum.zw; - const float3 scalar_weight_sum = float3(weight_sum_reduce.xxx + - weight_sum_reduce.yyy); - return (pixel_color/scalar_weight_sum); -} - - -//////////////////////////// TILE SIZE CALCULATION /////////////////////////// - -float2 get_resized_mask_tile_size(const float2 estimated_viewport_size, - const float2 estimated_mask_resize_output_size, - const bool solemnly_swear_same_inputs_for_every_pass) -{ - // Requires: The following global constants must be defined according to - // certain constraints: - // 1.) mask_resize_num_triads: Must be high enough that our - // mask sampling method won't have artifacts later - // (long story; see derived-settings-and-constants.h) - // 2.) mask_resize_src_lut_size: Texel size of our mask LUT - // 3.) mask_triads_per_tile: Num horizontal triads in our LUT - // 4.) mask_min_allowed_triad_size: User setting (the more - // restrictive it is, the faster the resize will go) - // 5.) mask_min_allowed_tile_size_x < mask_resize_src_lut_size.x - // 6.) mask_triad_size_desired_{runtime, static} - // 7.) mask_num_triads_desired_{runtime, static} - // 8.) mask_specify_num_triads must be 0.0/1.0 (false/true) - // The function parameters must be defined as follows: - // 1.) estimated_viewport_size == (final viewport size); - // If mask_specify_num_triads is 1.0/true and the viewport - // estimate is wrong, the number of triads will differ from - // the user's preference by about the same factor. - // 2.) estimated_mask_resize_output_size: Must equal the - // output size of the MASK_RESIZE pass. - // Exception: The x component may be estimated garbage if - // and only if the caller throws away the x result. - // 3.) solemnly_swear_same_inputs_for_every_pass: Set to false, - // unless you can guarantee that every call across every - // pass will use the same sizes for the other parameters. - // When calling this across multiple passes, always use the - // same y viewport size/scale, and always use the same x - // viewport size/scale when using the x result. - // Returns: Return the final size of a manually resized mask tile, after - // constraining the desired size to avoid artifacts. Under - // unusual circumstances, tiles may become stretched vertically - // (see wall of text below). - // Stated tile properties must be correct: - static const float tile_aspect_ratio_inv = - mask_resize_src_lut_size.y/mask_resize_src_lut_size.x; - static const float tile_aspect_ratio = 1.0/tile_aspect_ratio_inv; - static const float2 tile_aspect = float2(1.0, tile_aspect_ratio_inv); - // If mask_specify_num_triads is 1.0/true and estimated_viewport_size.x is - // wrong, the user preference will be misinterpreted: - const float desired_tile_size_x = mask_triads_per_tile * lerp( - mask_triad_size_desired, - estimated_viewport_size.x / mask_num_triads_desired, - mask_specify_num_triads); - if(get_mask_sample_mode() > 0.5) - { - // We don't need constraints unless we're sampling MASK_RESIZE. - return desired_tile_size_x * tile_aspect; - } - // Make sure we're not upsizing: - const float temp_tile_size_x = - min(desired_tile_size_x, mask_resize_src_lut_size.x); - // Enforce min_tile_size and max_tile_size in both dimensions: - const float2 temp_tile_size = temp_tile_size_x * tile_aspect; - static const float2 min_tile_size = - mask_min_allowed_tile_size * tile_aspect; - const float2 max_tile_size = - estimated_mask_resize_output_size / mask_resize_num_tiles; - const float2 clamped_tile_size = - clamp(temp_tile_size, min_tile_size, max_tile_size); - // Try to maintain tile_aspect_ratio. This is the tricky part: - // If we're currently resizing in the y dimension, the x components - // could be MEANINGLESS. (If estimated_mask_resize_output_size.x is - // bogus, then so is max_tile_size.x and clamped_tile_size.x.) - // We can't adjust the y size based on clamped_tile_size.x. If it - // clamps when it shouldn't, it won't clamp again when later passes - // call this function with the correct sizes, and the discrepancy will - // break the sampling coords in MASKED_SCANLINES. Instead, we'll limit - // the x size based on the y size, but not vice versa, unless the - // caller swears the parameters were the same (correct) in every pass. - // As a result, triads could appear vertically stretched if: - // a.) mask_resize_src_lut_size.x > mask_resize_src_lut_size.y: Wide - // LUT's might clamp x more than y (all provided LUT's are square) - // b.) true_viewport_size.x < true_viewport_size.y: The user is playing - // with a vertically oriented screen (not accounted for anyway) - // c.) mask_resize_viewport_scale.x < masked_resize_viewport_scale.y: - // Viewport scales are equal by default. - // If any of these are the case, you can fix the stretching by setting: - // mask_resize_viewport_scale.x = mask_resize_viewport_scale.y * - // (1.0 / min_expected_aspect_ratio) * - // (mask_resize_src_lut_size.x / mask_resize_src_lut_size.y) - const float x_tile_size_from_y = - clamped_tile_size.y * tile_aspect_ratio; - const float y_tile_size_from_x = lerp(clamped_tile_size.y, - clamped_tile_size.x * tile_aspect_ratio_inv, - float(solemnly_swear_same_inputs_for_every_pass)); - const float2 reclamped_tile_size = float2( - min(clamped_tile_size.x, x_tile_size_from_y), - min(clamped_tile_size.y, y_tile_size_from_x)); - // We need integer tile sizes in both directions for tiled sampling to - // work correctly. Use floor (to make sure we don't round up), but be - // careful to avoid a rounding bug where floor decreases whole numbers: - const float2 final_resized_tile_size = - floor(reclamped_tile_size + float2(FIX_ZERO(0.0),FIX_ZERO(0.0))); - return final_resized_tile_size; -} - - -///////////////////////// FINAL MASK SAMPLING HELPERS //////////////////////// - -float4 get_mask_sampling_parameters(const float2 mask_resize_texture_size, - const float2 mask_resize_video_size, const float2 true_viewport_size, - out float2 mask_tiles_per_screen) -{ - // Requires: 1.) Requirements of get_resized_mask_tile_size() must be - // met, particularly regarding global constants. - // The function parameters must be defined as follows: - // 1.) mask_resize_texture_size == MASK_RESIZE.texture_size - // if get_mask_sample_mode() is 0 (otherwise anything) - // 2.) mask_resize_video_size == MASK_RESIZE.video_size - // if get_mask_sample_mode() is 0 (otherwise anything) - // 3.) true_viewport_size == IN.output_size for a pass set to - // 1.0 viewport scale (i.e. it must be correct) - // Returns: Return a float4 containing: - // xy: tex_uv coords for the start of the mask tile - // zw: tex_uv size of the mask tile from start to end - // mask_tiles_per_screen is an out parameter containing the - // number of mask tiles that will fit on the screen. - // First get the final resized tile size. The viewport size and mask - // resize viewport scale must be correct, but don't solemnly swear they - // were correct in both mask resize passes unless you know it's true. - // (We can better ensure a correct tile aspect ratio if the parameters are - // guaranteed correct in all passes...but if we lie, we'll get inconsistent - // sizes across passes, resulting in broken texture coordinates.) - const float mask_sample_mode = get_mask_sample_mode(); - const float2 mask_resize_tile_size = get_resized_mask_tile_size( - true_viewport_size, mask_resize_video_size, false); - if(mask_sample_mode < 0.5) - { - // Sample MASK_RESIZE: The resized tile is a fraction of the texture - // size and starts at a nonzero offset to allow for border texels: - const float2 mask_tile_uv_size = mask_resize_tile_size / - mask_resize_texture_size; - const float2 skipped_tiles = mask_start_texels/mask_resize_tile_size; - const float2 mask_tile_start_uv = skipped_tiles * mask_tile_uv_size; - // mask_tiles_per_screen must be based on the *true* viewport size: - mask_tiles_per_screen = true_viewport_size / mask_resize_tile_size; - return float4(mask_tile_start_uv, mask_tile_uv_size); - } - else - { - // If we're tiling at the original size (1:1 pixel:texel), redefine a - // "tile" to be the full texture containing many triads. Otherwise, - // we're hardware-resampling an LUT, and the texture truly contains a - // single unresized phosphor mask tile anyway. - static const float2 mask_tile_uv_size = 1.0.xx; - static const float2 mask_tile_start_uv = 0.0.xx; - if(mask_sample_mode > 1.5) - { - // Repeat the full LUT at a 1:1 pixel:texel ratio without resizing: - mask_tiles_per_screen = true_viewport_size/mask_texture_large_size; - } - else - { - // Hardware-resize the original LUT: - mask_tiles_per_screen = true_viewport_size / mask_resize_tile_size; - } - return float4(mask_tile_start_uv, mask_tile_uv_size); - } -} - -float2 fix_tiling_discontinuities_normalized(const float2 tile_uv, - float2 duv_dx, float2 duv_dy) -{ - // Requires: 1.) duv_dx == ddx(tile_uv) - // 2.) duv_dy == ddy(tile_uv) - // 3.) tile_uv contains tile-relative uv coords in [0, 1], - // such that (0.5, 0.5) is the center of a tile, etc. - // ("Tile" can mean texture, the video embedded in the - // texture, or some other "tile" embedded in a texture.) - // Returns: Return new tile_uv coords that contain no discontinuities - // across a 2x2 pixel quad. - // Description: - // When uv coords wrap from 1.0 to 0.0, they create a discontinuity in the - // derivatives, which we assume happened if the absolute difference between - // any fragment in a 2x2 block is > ~half a tile. If the current block has - // a u or v discontinuity and the current fragment is in the first half of - // the tile along that axis (i.e. it wrapped from 1.0 to 0.0), add a tile - // to that coord to make the 2x2 block continuous. (It will now have a - // coord > 1.0 in the padding area beyond the tile.) This function takes - // derivatives as parameters so the caller can reuse them. - // In case we're using high-quality (nVidia-style) derivatives, ensure - // diagonically opposite fragments see each other for correctness: - duv_dx = abs(duv_dx) + abs(ddy(duv_dx)); - duv_dy = abs(duv_dy) + abs(ddx(duv_dy)); - const float2 pixel_in_first_half_tile = float2(tile_uv < 0.5.xx); - const float2 jump_exists = float2(duv_dx + duv_dy > 0.5.xx); - return tile_uv + jump_exists * pixel_in_first_half_tile; -} - -float2 convert_phosphor_tile_uv_wrap_to_tex_uv(const float2 tile_uv_wrap, - const float4 mask_tile_start_uv_and_size) -{ - // Requires: 1.) tile_uv_wrap contains tile-relative uv coords, where the - // tile spans from [0, 1], such that (0.5, 0.5) is at the - // tile center. The input coords can range from [0, inf], - // and their fractional parts map to a repeated tile. - // ("Tile" can mean texture, the video embedded in the - // texture, or some other "tile" embedded in a texture.) - // 2.) mask_tile_start_uv_and_size.xy contains tex_uv coords - // for the start of the embedded tile in the full texture. - // 3.) mask_tile_start_uv_and_size.zw contains the [fractional] - // tex_uv size of the embedded tile in the full texture. - // Returns: Return tex_uv coords (used for texture sampling) - // corresponding to tile_uv_wrap. - if(get_mask_sample_mode() < 0.5) - { - // Manually repeat the resized mask tile to fill the screen: - // First get fractional tile_uv coords. Using frac/fmod on coords - // confuses anisotropic filtering; fix it as user options dictate. - // derived-settings-and-constants.h disables incompatible options. - #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE - float2 tile_uv = frac(tile_uv_wrap * 0.5) * 2.0; - #else - float2 tile_uv = frac(tile_uv_wrap); - #endif - #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES - const float2 tile_uv_dx = ddx(tile_uv); - const float2 tile_uv_dy = ddy(tile_uv); - tile_uv = fix_tiling_discontinuities_normalized(tile_uv, - tile_uv_dx, tile_uv_dy); - #endif - // The tile is embedded in a padded FBO, and it may start at a - // nonzero offset if border texels are used to avoid artifacts: - const float2 mask_tex_uv = mask_tile_start_uv_and_size.xy + - tile_uv * mask_tile_start_uv_and_size.zw; - return mask_tex_uv; - } - else - { - // Sample from the input phosphor mask texture with hardware tiling. - // If we're tiling at the original size (mode 2), the "tile" is the - // whole texture, and it contains a large number of triads mapped with - // a 1:1 pixel:texel ratio. OTHERWISE, the texture contains a single - // unresized tile. tile_uv_wrap already has correct coords for both! - return tile_uv_wrap; - } -} - - -#endif // PHOSPHOR_MASK_RESIZING_H - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/quad-pixel-communication.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/quad-pixel-communication.fxh deleted file mode 100644 index 591c74c10..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/quad-pixel-communication.fxh +++ /dev/null @@ -1,243 +0,0 @@ -#ifndef QUAD_PIXEL_COMMUNICATION_H -#define QUAD_PIXEL_COMMUNICATION_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey* -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - -///////////////////////////////// DISCLAIMER ///////////////////////////////// - -// *This code was inspired by "Shader Amortization using Pixel Quad Message -// Passing" by Eric Penner, published in GPU Pro 2, Chapter VI.2. My intent -// is not to plagiarize his fundamentally similar code and assert my own -// copyright, but the algorithmic helper functions require so little code that -// implementations can't vary by much except bugfixes and conventions. I just -// wanted to license my own particular code here to avoid ambiguity and make it -// clear that as far as I'm concerned, people can do as they please with it. - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// Given screen pixel numbers, derive a "quad vector" describing a fragment's -// position in its 2x2 pixel quad. Given that vector, obtain the values of any -// variable at neighboring fragments. -// Requires: Using this file in general requires: -// 1.) ddx() and ddy() are present in the current Cg profile. -// 2.) The GPU driver is using fine/high-quality derivatives. -// Functions will give incorrect results if this is not true, -// so a test function is included. - - -///////////////////// QUAD-PIXEL COMMUNICATION PRIMITIVES //////////////////// - -float4 get_quad_vector_naive(const float4 output_pixel_num_wrt_uvxy) -{ - // Requires: Two measures of the current fragment's output pixel number - // in the range ([0, IN.output_size.x), [0, IN.output_size.y)): - // 1.) output_pixel_num_wrt_uvxy.xy increase with uv coords. - // 2.) output_pixel_num_wrt_uvxy.zw increase with screen xy. - // Returns: Two measures of the fragment's position in its 2x2 quad: - // 1.) The .xy components are its 2x2 placement with respect to - // uv direction (the origin (0, 0) is at the top-left): - // top-left = (-1.0, -1.0) top-right = ( 1.0, -1.0) - // bottom-left = (-1.0, 1.0) bottom-right = ( 1.0, 1.0) - // You need this to arrange/weight shared texture samples. - // 2.) The .zw components are its 2x2 placement with respect to - // screen xy direction (IN.position); the origin varies. - // quad_gather needs this measure to work correctly. - // Note: quad_vector.zw = quad_vector.xy * float2( - // ddx(output_pixel_num_wrt_uvxy.x), - // ddy(output_pixel_num_wrt_uvxy.y)); - // Caveats: This function assumes the GPU driver always starts 2x2 pixel - // quads at even pixel numbers. This assumption can be wrong - // for odd output resolutions (nondeterministically so). - const float4 pixel_odd = frac(output_pixel_num_wrt_uvxy * 0.5) * 2.0; - const float4 quad_vector = pixel_odd * 2.0 - 1.0.xxxx; - return quad_vector; -} - -float4 get_quad_vector(const float4 output_pixel_num_wrt_uvxy) -{ - // Requires: Same as get_quad_vector_naive() (see that first). - // Returns: Same as get_quad_vector_naive() (see that first), but it's - // correct even if the 2x2 pixel quad starts at an odd pixel, - // which can occur at odd resolutions. - const float4 quad_vector_guess = - get_quad_vector_naive(output_pixel_num_wrt_uvxy); - // If quad_vector_guess.zw doesn't increase with screen xy, we know - // the 2x2 pixel quad starts at an odd pixel: - const float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_guess.z), - ddy(quad_vector_guess.w)); - return quad_vector_guess * odd_start_mirror.xyxy; -} - -float4 get_quad_vector(const float2 output_pixel_num_wrt_uv) -{ - // Requires: 1.) ddx() and ddy() are present in the current Cg profile. - // 2.) output_pixel_num_wrt_uv must increase with uv coords and - // measure the current fragment's output pixel number in: - // ([0, IN.output_size.x), [0, IN.output_size.y)) - // Returns: Same as get_quad_vector_naive() (see that first), but it's - // correct even if the 2x2 pixel quad starts at an odd pixel, - // which can occur at odd resolutions. - // Caveats: This function requires less information than the version - // taking a float4, but it's potentially slower. - // Do screen coords increase with or against uv? Get the direction - // with respect to (uv.x, uv.y) for (screen.x, screen.y) in {-1, 1}. - const float2 screen_uv_mirror = float2(ddx(output_pixel_num_wrt_uv.x), - ddy(output_pixel_num_wrt_uv.y)); - const float2 pixel_odd_wrt_uv = frac(output_pixel_num_wrt_uv * 0.5) * 2.0; - const float2 quad_vector_uv_guess = (pixel_odd_wrt_uv - 0.5.xx) * 2.0; - const float2 quad_vector_screen_guess = quad_vector_uv_guess * screen_uv_mirror; - // If quad_vector_screen_guess doesn't increase with screen xy, we know - // the 2x2 pixel quad starts at an odd pixel: - const float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_screen_guess.x), - ddy(quad_vector_screen_guess.y)); - const float4 quad_vector_guess = float4( - quad_vector_uv_guess, quad_vector_screen_guess); - return quad_vector_guess * odd_start_mirror.xyxy; -} - -void quad_gather(const float4 quad_vector, const float4 curr, - out float4 adjx, out float4 adjy, out float4 diag) -{ - // Requires: 1.) ddx() and ddy() are present in the current Cg profile. - // 2.) The GPU driver is using fine/high-quality derivatives. - // 3.) quad_vector describes the current fragment's location in - // its 2x2 pixel quad using get_quad_vector()'s conventions. - // 4.) curr is any vector you wish to get neighboring values of. - // Returns: Values of an input vector (curr) at neighboring fragments - // adjacent x, adjacent y, and diagonal (via out parameters). - adjx = curr - ddx(curr) * quad_vector.z; - adjy = curr - ddy(curr) * quad_vector.w; - diag = adjx - ddy(adjx) * quad_vector.w; -} - -void quad_gather(const float4 quad_vector, const float3 curr, - out float3 adjx, out float3 adjy, out float3 diag) -{ - // Float3 version - adjx = curr - ddx(curr) * quad_vector.z; - adjy = curr - ddy(curr) * quad_vector.w; - diag = adjx - ddy(adjx) * quad_vector.w; -} - -void quad_gather(const float4 quad_vector, const float2 curr, - out float2 adjx, out float2 adjy, out float2 diag) -{ - // Float2 version - adjx = curr - ddx(curr) * quad_vector.z; - adjy = curr - ddy(curr) * quad_vector.w; - diag = adjx - ddy(adjx) * quad_vector.w; -} - -float4 quad_gather(const float4 quad_vector, const float curr) -{ - // Float version: - // Returns: return.x == current - // return.y == adjacent x - // return.z == adjacent y - // return.w == diagonal - float4 all = curr.xxxx; - all.y = all.x - ddx(all.x) * quad_vector.z; - all.zw = all.xy - ddy(all.xy) * quad_vector.w; - return all; -} - -float4 quad_gather_sum(const float4 quad_vector, const float4 curr) -{ - // Requires: Same as quad_gather() - // Returns: Sum of an input vector (curr) at all fragments in a quad. - float4 adjx, adjy, diag; - quad_gather(quad_vector, curr, adjx, adjy, diag); - return (curr + adjx + adjy + diag); -} - -float3 quad_gather_sum(const float4 quad_vector, const float3 curr) -{ - // Float3 version: - float3 adjx, adjy, diag; - quad_gather(quad_vector, curr, adjx, adjy, diag); - return (curr + adjx + adjy + diag); -} - -float2 quad_gather_sum(const float4 quad_vector, const float2 curr) -{ - // Float2 version: - float2 adjx, adjy, diag; - quad_gather(quad_vector, curr, adjx, adjy, diag); - return (curr + adjx + adjy + diag); -} - -float quad_gather_sum(const float4 quad_vector, const float curr) -{ - // Float version: - const float4 all_values = quad_gather(quad_vector, curr); - return (all_values.x + all_values.y + all_values.z + all_values.w); -} - -bool fine_derivatives_working(const float4 quad_vector, float4 curr) -{ - // Requires: 1.) ddx() and ddy() are present in the current Cg profile. - // 2.) quad_vector describes the current fragment's location in - // its 2x2 pixel quad using get_quad_vector()'s conventions. - // 3.) curr must be a test vector with non-constant derivatives - // (its value should change nonlinearly across fragments). - // Returns: true if fine/hybrid/high-quality derivatives are used, or - // false if coarse derivatives are used or inconclusive - // Usage: Test whether quad-pixel communication is working! - // Method: We can confirm fine derivatives are used if the following - // holds (ever, for any value at any fragment): - // (ddy(curr) != ddy(adjx)) or (ddx(curr) != ddx(adjy)) - // The more values we test (e.g. test a float4 two ways), the - // easier it is to demonstrate fine derivatives are working. - // TODO: Check for floating point exact comparison issues! - float4 ddx_curr = ddx(curr); - float4 ddy_curr = ddy(curr); - float4 adjx = curr - ddx_curr * quad_vector.z; - float4 adjy = curr - ddy_curr * quad_vector.w; - bool ddy_different = any(ddy_curr != ddy(adjx)); - bool ddx_different = any(ddx_curr != ddx(adjy)); - return any(bool2(ddy_different, ddx_different)); -} - -bool fine_derivatives_working_fast(const float4 quad_vector, float curr) -{ - // Requires: Same as fine_derivatives_working() - // Returns: Same as fine_derivatives_working() - // Usage: This is faster than fine_derivatives_working() but more - // likely to return false negatives, so it's less useful for - // offline testing/debugging. It's also useless as the basis - // for dynamic runtime branching as of May 2014: Derivatives - // (and quad-pixel communication) are currently disallowed in - // branches. However, future GPU's may allow you to use them - // in dynamic branches if you promise the branch condition - // evaluates the same for every fragment in the quad (and/or if - // the driver enforces that promise by making a single fragment - // control branch decisions). If that ever happens, this - // version may become a more economical choice. - float ddx_curr = ddx(curr); - float ddy_curr = ddy(curr); - float adjx = curr - ddx_curr * quad_vector.z; - return (ddy_curr != ddy(adjx)); -} - -#endif // QUAD_PIXEL_COMMUNICATION_H - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/scanline-functions.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/scanline-functions.fxh deleted file mode 100644 index 27710ce54..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/scanline-functions.fxh +++ /dev/null @@ -1,569 +0,0 @@ -#ifndef SCANLINE_FUNCTIONS_H -#define SCANLINE_FUNCTIONS_H - -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "user-settings.fxh" -#include "derived-settings-and-constants.fxh" -#include "special-functions.fxh" -#include "gamma-management.fxh" - - -///////////////////////////// SCANLINE FUNCTIONS ///////////////////////////// - -float3 get_gaussian_sigma(const float3 color, const float sigma_range) -{ - // Requires: Globals: - // 1.) beam_min_sigma and beam_max_sigma are global floats - // containing the desired minimum and maximum beam standard - // deviations, for dim and bright colors respectively. - // 2.) beam_max_sigma must be > 0.0 - // 3.) beam_min_sigma must be in (0.0, beam_max_sigma] - // 4.) beam_spot_power must be defined as a global float. - // Parameters: - // 1.) color is the underlying source color along a scanline - // 2.) sigma_range = beam_max_sigma - beam_min_sigma; we take - // sigma_range as a parameter to avoid repeated computation - // when beam_{min, max}_sigma are runtime shader parameters - // Optional: Users may set beam_spot_shape_function to 1 to define the - // inner f(color) subfunction (see below) as: - // f(color) = sqrt(1.0 - (color - 1.0)*(color - 1.0)) - // Otherwise (technically, if beam_spot_shape_function < 0.5): - // f(color) = pow(color, beam_spot_power) - // Returns: The standard deviation of the Gaussian beam for "color:" - // sigma = beam_min_sigma + sigma_range * f(color) - // Details/Discussion: - // The beam's spot shape vaguely resembles an aspect-corrected f() in the - // range [0, 1] (not quite, but it's related). f(color) = color makes - // spots look like diamonds, and a spherical function or cube balances - // between variable width and a soft/realistic shape. A beam_spot_power - // > 1.0 can produce an ugly spot shape and more initial clipping, but the - // final shape also differs based on the horizontal resampling filter and - // the phosphor bloom. For instance, resampling horizontally in nonlinear - // light and/or with a sharp (e.g. Lanczos) filter will sharpen the spot - // shape, but a sixth root is still quite soft. A power function (default - // 1.0/3.0 beam_spot_power) is most flexible, but a fixed spherical curve - // has the highest variability without an awful spot shape. - // - // beam_min_sigma affects scanline sharpness/aliasing in dim areas, and its - // difference from beam_max_sigma affects beam width variability. It only - // affects clipping [for pure Gaussians] if beam_spot_power > 1.0 (which is - // a conservative estimate for a more complex constraint). - // - // beam_max_sigma affects clipping and increasing scanline width/softness - // as color increases. The wider this is, the more scanlines need to be - // evaluated to avoid distortion. For a pure Gaussian, the max_beam_sigma - // at which the first unused scanline always has a weight < 1.0/255.0 is: - // num scanlines = 2, max_beam_sigma = 0.2089; distortions begin ~0.34 - // num scanlines = 3, max_beam_sigma = 0.3879; distortions begin ~0.52 - // num scanlines = 4, max_beam_sigma = 0.5723; distortions begin ~0.70 - // num scanlines = 5, max_beam_sigma = 0.7591; distortions begin ~0.89 - // num scanlines = 6, max_beam_sigma = 0.9483; distortions begin ~1.08 - // Generalized Gaussians permit more leeway here as steepness increases. - if(beam_spot_shape_function < 0.5) - { - // Use a power function: - return beam_min_sigma.xxx + sigma_range * - pow(color, beam_spot_power); - } - else - { - // Use a spherical function: - const float3 color_minus_1 = color - 1.0.xxx; - return beam_min_sigma.xxx + sigma_range * - sqrt(1.0.xxx - color_minus_1*color_minus_1); - } -} - -float3 get_generalized_gaussian_beta(const float3 color, - const float shape_range) -{ - // Requires: Globals: - // 1.) beam_min_shape and beam_max_shape are global floats - // containing the desired min/max generalized Gaussian - // beta parameters, for dim and bright colors respectively. - // 2.) beam_max_shape must be >= 2.0 - // 3.) beam_min_shape must be in [2.0, beam_max_shape] - // 4.) beam_shape_power must be defined as a global float. - // Parameters: - // 1.) color is the underlying source color along a scanline - // 2.) shape_range = beam_max_shape - beam_min_shape; we take - // shape_range as a parameter to avoid repeated computation - // when beam_{min, max}_shape are runtime shader parameters - // Returns: The type-I generalized Gaussian "shape" parameter beta for - // the given color. - // Details/Discussion: - // Beta affects the scanline distribution as follows: - // a.) beta < 2.0 narrows the peak to a spike with a discontinuous slope - // b.) beta == 2.0 just degenerates to a Gaussian - // c.) beta > 2.0 flattens and widens the peak, then drops off more steeply - // than a Gaussian. Whereas high sigmas widen and soften peaks, high - // beta widen and sharpen peaks at the risk of aliasing. - // Unlike high beam_spot_powers, high beam_shape_powers actually soften shape - // transitions, whereas lower ones sharpen them (at the risk of aliasing). - return beam_min_shape + shape_range * pow(color, beam_shape_power); -} - -float3 scanline_gaussian_integral_contrib(const float3 dist, - const float3 color, const float pixel_height, const float sigma_range) -{ - // Requires: 1.) dist is the distance of the [potentially separate R/G/B] - // point(s) from a scanline in units of scanlines, where - // 1.0 means the sample point straddles the next scanline. - // 2.) color is the underlying source color along a scanline. - // 3.) pixel_height is the output pixel height in scanlines. - // 4.) Requirements of get_gaussian_sigma() must be met. - // Returns: Return a scanline's light output over a given pixel. - // Details: - // The CRT beam profile follows a roughly Gaussian distribution which is - // wider for bright colors than dark ones. The integral over the full - // range of a Gaussian function is always 1.0, so we can vary the beam - // with a standard deviation without affecting brightness. 'x' = distance: - // gaussian sample = 1/(sigma*sqrt(2*pi)) * e**(-(x**2)/(2*sigma**2)) - // gaussian integral = 0.5 (1.0 + erf(x/(sigma * sqrt(2)))) - // Use a numerical approximation of the "error function" (the Gaussian - // indefinite integral) to find the definite integral of the scanline's - // average brightness over a given pixel area. Even if curved coords were - // used in this pass, a flat scalar pixel height works almost as well as a - // pixel height computed from a full pixel-space to scanline-space matrix. - const float3 sigma = get_gaussian_sigma(color, sigma_range); - const float3 ph_offset = (pixel_height.xxx) * 0.5; - const float3 denom_inv = 1.0/(sigma*sqrt(2.0)); - const float3 integral_high = erf((dist + ph_offset)*denom_inv); - const float3 integral_low = erf((dist - ph_offset)*denom_inv); - return color * 0.5*(integral_high - integral_low)/pixel_height; -} - -float3 scanline_generalized_gaussian_integral_contrib(const float3 dist, - const float3 color, const float pixel_height, const float sigma_range, - const float shape_range) -{ - // Requires: 1.) Requirements of scanline_gaussian_integral_contrib() - // must be met. - // 2.) Requirements of get_gaussian_sigma() must be met. - // 3.) Requirements of get_generalized_gaussian_beta() must be - // met. - // Returns: Return a scanline's light output over a given pixel. - // A generalized Gaussian distribution allows the shape (beta) to vary - // as well as the width (alpha). "gamma" refers to the gamma function: - // generalized sample = - // beta/(2*alpha*gamma(1/beta)) * e**(-(|x|/alpha)**beta) - // ligamma(s, z) is the lower incomplete gamma function, for which we only - // implement two of four branches (because we keep 1/beta <= 0.5): - // generalized integral = 0.5 + 0.5* sign(x) * - // ligamma(1/beta, (|x|/alpha)**beta)/gamma(1/beta) - // See get_generalized_gaussian_beta() for a discussion of beta. - // We base alpha on the intended Gaussian sigma, but it only strictly - // models models standard deviation at beta == 2, because the standard - // deviation depends on both alpha and beta (keeping alpha independent is - // faster and preserves intuitive behavior and a full spectrum of results). - const float3 alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range); - const float3 beta = get_generalized_gaussian_beta(color, shape_range); - const float3 alpha_inv = 1.0.xxx/alpha; - const float3 s = 1.0.xxx/beta; - const float3 ph_offset = (pixel_height.xxx) * 0.5; - // Pass beta to gamma_impl to avoid repeated divides. Similarly pass - // beta (i.e. 1/s) and 1/gamma(s) to normalized_ligamma_impl. - const float3 gamma_s_inv = 1.0.xxx/gamma_impl(s, beta); - const float3 dist1 = dist + ph_offset; - const float3 dist0 = dist - ph_offset; - const float3 integral_high = sign(dist1) * normalized_ligamma_impl( - s, pow(abs(dist1)*alpha_inv, beta), beta, gamma_s_inv); - const float3 integral_low = sign(dist0) * normalized_ligamma_impl( - s, pow(abs(dist0)*alpha_inv, beta), beta, gamma_s_inv); - return color * 0.5*(integral_high - integral_low)/pixel_height; -} - -float3 scanline_gaussian_sampled_contrib(const float3 dist, const float3 color, - const float pixel_height, const float sigma_range) -{ - // See scanline_gaussian integral_contrib() for detailed comments! - // gaussian sample = 1/(sigma*sqrt(2*pi)) * e**(-(x**2)/(2*sigma**2)) - const float3 sigma = get_gaussian_sigma(color, sigma_range); - // Avoid repeated divides: - const float3 sigma_inv = 1.0.xxx/sigma; - const float3 inner_denom_inv = 0.5 * sigma_inv * sigma_inv; - const float3 outer_denom_inv = sigma_inv/sqrt(2.0*pi); - if(beam_antialias_level > 0.5) - { - // Sample 1/3 pixel away in each direction as well: - const float3 sample_offset = pixel_height.xxx/3.0; - const float3 dist2 = dist + sample_offset; - const float3 dist3 = abs(dist - sample_offset); - // Average three pure Gaussian samples: - const float3 scale = color/3.0 * outer_denom_inv; - const float3 weight1 = exp(-(dist*dist)*inner_denom_inv); - const float3 weight2 = exp(-(dist2*dist2)*inner_denom_inv); - const float3 weight3 = exp(-(dist3*dist3)*inner_denom_inv); - return scale * (weight1 + weight2 + weight3); - } - else - { - return color*exp(-(dist*dist)*inner_denom_inv)*outer_denom_inv; - } -} - -float3 scanline_generalized_gaussian_sampled_contrib(const float3 dist, - const float3 color, const float pixel_height, const float sigma_range, - const float shape_range) -{ - // See scanline_generalized_gaussian_integral_contrib() for details! - // generalized sample = - // beta/(2*alpha*gamma(1/beta)) * e**(-(|x|/alpha)**beta) - const float3 alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range); - const float3 beta = get_generalized_gaussian_beta(color, shape_range); - // Avoid repeated divides: - const float3 alpha_inv = 1.0.xxx/alpha; - const float3 beta_inv = 1.0.xxx/beta; - const float3 scale = color * beta * 0.5 * alpha_inv / - gamma_impl(beta_inv, beta); - if(beam_antialias_level > 0.5) - { - // Sample 1/3 pixel closer to and farther from the scanline too. - const float3 sample_offset = pixel_height.xxx/3.0; - const float3 dist2 = dist + sample_offset; - const float3 dist3 = abs(dist - sample_offset); - // Average three generalized Gaussian samples: - const float3 weight1 = exp(-pow(abs(dist*alpha_inv), beta)); - const float3 weight2 = exp(-pow(abs(dist2*alpha_inv), beta)); - const float3 weight3 = exp(-pow(abs(dist3*alpha_inv), beta)); - return scale/3.0 * (weight1 + weight2 + weight3); - } - else - { - return scale * exp(-pow(abs(dist*alpha_inv), beta)); - } -} - -float3 scanline_contrib(float3 dist, float3 color, - float pixel_height, const float sigma_range, const float shape_range) -{ - // Requires: 1.) Requirements of scanline_gaussian_integral_contrib() - // must be met. - // 2.) Requirements of get_gaussian_sigma() must be met. - // 3.) Requirements of get_generalized_gaussian_beta() must be - // met. - // Returns: Return a scanline's light output over a given pixel, using - // a generalized or pure Gaussian distribution and sampling or - // integrals as desired by user codepath choices. - if(beam_generalized_gaussian) - { - if(beam_antialias_level > 1.5) - { - return scanline_generalized_gaussian_integral_contrib( - dist, color, pixel_height, sigma_range, shape_range); - } - else - { - return scanline_generalized_gaussian_sampled_contrib( - dist, color, pixel_height, sigma_range, shape_range); - } - } - else - { - if(beam_antialias_level > 1.5) - { - return scanline_gaussian_integral_contrib( - dist, color, pixel_height, sigma_range); - } - else - { - return scanline_gaussian_sampled_contrib( - dist, color, pixel_height, sigma_range); - } - } -} - -float3 get_raw_interpolated_color(const float3 color0, - const float3 color1, const float3 color2, const float3 color3, - const float4 weights) -{ - // Use max to avoid bizarre artifacts from negative colors: - return max(mul(weights, float4x3(color0, color1, color2, color3)), 0.0); -} - -float3 get_interpolated_linear_color(const float3 color0, const float3 color1, - const float3 color2, const float3 color3, const float4 weights) -{ - // Requires: 1.) Requirements of include/gamma-management.h must be met: - // intermediate_gamma must be globally defined, and input - // colors are interpreted as linear RGB unless you #define - // GAMMA_ENCODE_EVERY_FBO (in which case they are - // interpreted as gamma-encoded with intermediate_gamma). - // 2.) color0-3 are colors sampled from a texture with tex2D(). - // They are interpreted as defined in requirement 1. - // 3.) weights contains weights for each color, summing to 1.0. - // 4.) beam_horiz_linear_rgb_weight must be defined as a global - // float in [0.0, 1.0] describing how much blending should - // be done in linear RGB (rest is gamma-corrected RGB). - // 5.) RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE must be #defined - // if beam_horiz_linear_rgb_weight is anything other than a - // static constant, or we may try branching at runtime - // without dynamic branches allowed (slow). - // Returns: Return an interpolated color lookup between the four input - // colors based on the weights in weights. The final color will - // be a linear RGB value, but the blending will be done as - // indicated above. - const float intermediate_gamma = get_intermediate_gamma(); - // Branch if beam_horiz_linear_rgb_weight is static (for free) or if the - // profile allows dynamic branches (faster than computing extra pows): - #ifndef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE - #define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT - #else - #ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES - #define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT - #endif - #endif - #ifdef SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT - // beam_horiz_linear_rgb_weight is static, so we can branch: - #ifdef GAMMA_ENCODE_EVERY_FBO - const float3 gamma_mixed_color = pow(get_raw_interpolated_color( - color0, color1, color2, color3, weights), intermediate_gamma); - if(beam_horiz_linear_rgb_weight > 0.0) - { - const float3 linear_mixed_color = get_raw_interpolated_color( - pow(color0, intermediate_gamma), - pow(color1, intermediate_gamma), - pow(color2, intermediate_gamma), - pow(color3, intermediate_gamma), - weights); - return lerp(gamma_mixed_color, linear_mixed_color, - beam_horiz_linear_rgb_weight); - } - else - { - return gamma_mixed_color; - } - #else - const float3 linear_mixed_color = get_raw_interpolated_color( - color0, color1, color2, color3, weights); - if(beam_horiz_linear_rgb_weight < 1.0) - { - const float3 gamma_mixed_color = get_raw_interpolated_color( - pow(color0, 1.0/intermediate_gamma), - pow(color1, 1.0/intermediate_gamma), - pow(color2, 1.0/intermediate_gamma), - pow(color3, 1.0/intermediate_gamma), - weights); - return lerp(gamma_mixed_color, linear_mixed_color, - beam_horiz_linear_rgb_weight); - } - else - { - return linear_mixed_color; - } - #endif // GAMMA_ENCODE_EVERY_FBO - #else - #ifdef GAMMA_ENCODE_EVERY_FBO - // Inputs: color0-3 are colors in gamma-encoded RGB. - const float3 gamma_mixed_color = pow(get_raw_interpolated_color( - color0, color1, color2, color3, weights), intermediate_gamma); - const float3 linear_mixed_color = get_raw_interpolated_color( - pow(color0, intermediate_gamma), - pow(color1, intermediate_gamma), - pow(color2, intermediate_gamma), - pow(color3, intermediate_gamma), - weights); - return lerp(gamma_mixed_color, linear_mixed_color, - beam_horiz_linear_rgb_weight); - #else - // Inputs: color0-3 are colors in linear RGB. - const float3 linear_mixed_color = get_raw_interpolated_color( - color0, color1, color2, color3, weights); - const float3 gamma_mixed_color = get_raw_interpolated_color( - pow(color0, 1.0/intermediate_gamma), - pow(color1, 1.0/intermediate_gamma), - pow(color2, 1.0/intermediate_gamma), - pow(color3, 1.0/intermediate_gamma), - weights); - return lerp(gamma_mixed_color, linear_mixed_color, - beam_horiz_linear_rgb_weight); - #endif // GAMMA_ENCODE_EVERY_FBO - #endif // SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT -} - -float3 get_scanline_color(const sampler2D Source, const float2 scanline_uv, - const float2 uv_step_x, const float4 weights) -{ - // Requires: 1.) scanline_uv must be vertically snapped to the caller's - // desired line or scanline and horizontally snapped to the - // texel just left of the output pixel (color1) - // 2.) uv_step_x must contain the horizontal uv distance - // between texels. - // 3.) weights must contain interpolation filter weights for - // color0, color1, color2, and color3, where color1 is just - // left of the output pixel. - // Returns: Return a horizontally interpolated texture lookup using 2-4 - // nearby texels, according to weights and the conventions of - // get_interpolated_linear_color(). - // We can ignore the outside texture lookups for Quilez resampling. - const float3 color1 = tex2D(Source, scanline_uv).rgb; - const float3 color2 = tex2D(Source, scanline_uv + uv_step_x).rgb; - float3 color0 = 0.0.xxx; - float3 color3 = 0.0.xxx; - if(beam_horiz_filter > 0.5) - { - color0 = tex2D(Source, scanline_uv - uv_step_x).rgb; - color3 = tex2D(Source, scanline_uv + 2.0 * uv_step_x).rgb; - } - // Sample the texture as-is, whether it's linear or gamma-encoded: - // get_interpolated_linear_color() will handle the difference. - return get_interpolated_linear_color(color0, color1, color2, color3, weights); -} - -float3 sample_single_scanline_horizontal(const sampler2D Source, - const float2 tex_uv, const float2 texture_size, - const float2 texture_size_inv) -{ - // TODO: Add function requirements. - // Snap to the previous texel and get sample dists from 2/4 nearby texels: - const float2 curr_texel = tex_uv * texture_size; - // Use under_half to fix a rounding bug right around exact texel locations. - const float2 prev_texel = - floor(curr_texel - under_half.xx) + 0.5.xx; - const float2 prev_texel_hor = float2(prev_texel.x, curr_texel.y); - const float2 prev_texel_hor_uv = prev_texel_hor * texture_size_inv; - const float prev_dist = curr_texel.x - prev_texel_hor.x; - const float4 sample_dists = float4(1.0 + prev_dist, prev_dist, - 1.0 - prev_dist, 2.0 - prev_dist); - // Get Quilez, Lanczos2, or Gaussian resize weights for 2/4 nearby texels: - float4 weights; - if(beam_horiz_filter < 0.5) - { - // Quilez: - const float x = sample_dists.y; - const float w2 = x*x*x*(x*(x*6.0 - 15.0) + 10.0); - weights = float4(0.0, 1.0 - w2, w2, 0.0); - } - else if(beam_horiz_filter < 1.5) - { - // Gaussian: - float inner_denom_inv = 1.0/(2.0*beam_horiz_sigma*beam_horiz_sigma); - weights = exp(-(sample_dists*sample_dists)*inner_denom_inv); - } - else - { - // Lanczos2: - const float4 pi_dists = FIX_ZERO(sample_dists * pi); - weights = 2.0 * sin(pi_dists) * sin(pi_dists * 0.5) / - (pi_dists * pi_dists); - } - // Ensure the weight sum == 1.0: - const float4 final_weights = weights/dot(weights, 1.0.xxxx); - // Get the interpolated horizontal scanline color: - const float2 uv_step_x = float2(texture_size_inv.x, 0.0); - return get_scanline_color( - Source, prev_texel_hor_uv, uv_step_x, final_weights); -} - -float3 sample_rgb_scanline_horizontal(const sampler2D Source, - const float2 tex_uv, const float2 texture_size, - const float2 texture_size_inv) -{ - // TODO: Add function requirements. - // Rely on a helper to make convergence easier. - if(beam_misconvergence) - { - const float3 convergence_offsets_rgb = - get_convergence_offsets_x_vector(); - const float3 offset_u_rgb = - convergence_offsets_rgb * texture_size_inv.xxx; - const float2 scanline_uv_r = tex_uv - float2(offset_u_rgb.r, 0.0); - const float2 scanline_uv_g = tex_uv - float2(offset_u_rgb.g, 0.0); - const float2 scanline_uv_b = tex_uv - float2(offset_u_rgb.b, 0.0); - const float3 sample_r = sample_single_scanline_horizontal( - Source, scanline_uv_r, texture_size, texture_size_inv); - const float3 sample_g = sample_single_scanline_horizontal( - Source, scanline_uv_g, texture_size, texture_size_inv); - const float3 sample_b = sample_single_scanline_horizontal( - Source, scanline_uv_b, texture_size, texture_size_inv); - return float3(sample_r.r, sample_g.g, sample_b.b); - } - else - { - return sample_single_scanline_horizontal(Source, tex_uv, texture_size, - texture_size_inv); - } -} - -float2 get_last_scanline_uv(const float2 tex_uv, const float2 texture_size, - const float2 texture_size_inv, const float2 il_step_multiple, - const float frame_count, out float dist) -{ - // Compute texture coords for the last/upper scanline, accounting for - // interlacing: With interlacing, only consider even/odd scanlines every - // other frame. Top-field first (TFF) order puts even scanlines on even - // frames, and BFF order puts them on odd frames. Texels are centered at: - // frac(tex_uv * texture_size) == x.5 - // Caution: If these coordinates ever seem incorrect, first make sure it's - // not because anisotropic filtering is blurring across field boundaries. - // Note: TFF/BFF won't matter for sources that double-weave or similar. - const float field_offset = floor(il_step_multiple.y * 0.75) * - fmod(frame_count + float(interlace_bff), 2.0); - const float2 curr_texel = tex_uv * texture_size; - // Use under_half to fix a rounding bug right around exact texel locations. - // This causes an insane bug on duckstation, so it's disabled here. (Hyllian, 2024) -// const float2 prev_texel_num = floor(curr_texel - under_half.xx); - const float2 prev_texel_num = curr_texel; - const float wrong_field = fmod( - prev_texel_num.y + field_offset, il_step_multiple.y); - const float2 scanline_texel_num = prev_texel_num - float2(0.0, wrong_field); - // Snap to the center of the previous scanline in the current field: - const float2 scanline_texel = scanline_texel_num + 0.5.xx; - const float2 scanline_uv = scanline_texel * texture_size_inv; - // Save the sample's distance from the scanline, in units of scanlines: - dist = (curr_texel.y - scanline_texel.y)/il_step_multiple.y; - return scanline_uv; -} - -bool is_interlaced(float num_lines) -{ - // Detect interlacing based on the number of lines in the source. - if(interlace_detect) - { - // NTSC: 525 lines, 262.5/field; 486 active (2 half-lines), 243/field - // NTSC Emulators: Typically 224 or 240 lines - // PAL: 625 lines, 312.5/field; 576 active (typical), 288/field - // PAL Emulators: ? - // ATSC: 720p, 1080i, 1080p - // Where do we place our cutoffs? Assumptions: - // 1.) We only need to care about active lines. - // 2.) Anything > 288 and <= 576 lines is probably interlaced. - // 3.) Anything > 576 lines is probably not interlaced... - // 4.) ...except 1080 lines, which is a crapshoot (user decision). - // 5.) Just in case the main program uses calculated video sizes, - // we should nudge the float thresholds a bit. - const bool sd_interlace = ((num_lines > 288.5) && (num_lines < 576.5)); - const bool hd_interlace = interlace_1080i ? - ((num_lines > 1079.5) && (num_lines < 1080.5)) : - false; - return (sd_interlace || hd_interlace); - } - else - { - return false; - } -} - - -#endif // SCANLINE_FUNCTIONS_H - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/special-functions.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/special-functions.fxh deleted file mode 100644 index 6f425c8a1..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/special-functions.fxh +++ /dev/null @@ -1,498 +0,0 @@ -#ifndef SPECIAL_FUNCTIONS_H -#define SPECIAL_FUNCTIONS_H - -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -///////////////////////////////// DESCRIPTION //////////////////////////////// - -// This file implements the following mathematical special functions: -// 1.) erf() = 2/sqrt(pi) * indefinite_integral(e**(-x**2)) -// 2.) gamma(s), a real-numbered extension of the integer factorial function -// It also implements normalized_ligamma(s, z), a normalized lower incomplete -// gamma function for s < 0.5 only. Both gamma() and normalized_ligamma() can -// be called with an _impl suffix to use an implementation version with a few -// extra precomputed parameters (which may be useful for the caller to reuse). -// See below for details. -// -// Design Rationale: -// Pretty much every line of code in this file is duplicated four times for -// different input types (float4/float3/float2/float). This is unfortunate, -// but Cg doesn't allow function templates. Macros would be far less verbose, -// but they would make the code harder to document and read. I don't expect -// these functions will require a whole lot of maintenance changes unless -// someone ever has need for more robust incomplete gamma functions, so code -// duplication seems to be the lesser evil in this case. - - -/////////////////////////// GAUSSIAN ERROR FUNCTION ////////////////////////// - -float4 erf6(float4 x) -{ - // Requires: x is the standard parameter to erf(). - // Returns: Return an Abramowitz/Stegun approximation of erf(), where: - // erf(x) = 2/sqrt(pi) * integral(e**(-x**2)) - // This approximation has a max absolute error of 2.5*10**-5 - // with solid numerical robustness and efficiency. See: - // https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions - static const float4 one = 1.0.xxxx; - const float4 sign_x = sign(x); - const float4 t = one/(one + 0.47047*abs(x)); - const float4 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))* - exp(-(x*x)); - return result * sign_x; -} - -float3 erf6(const float3 x) -{ - // Float3 version: - static const float3 one = 1.0.xxx; - const float3 sign_x = sign(x); - const float3 t = one/(one + 0.47047*abs(x)); - const float3 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))* - exp(-(x*x)); - return result * sign_x; -} - -float2 erf6(const float2 x) -{ - // Float2 version: - static const float2 one = 1.0.xx; - const float2 sign_x = sign(x); - const float2 t = one/(one + 0.47047*abs(x)); - const float2 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))* - exp(-(x*x)); - return result * sign_x; -} - -float erf6(const float x) -{ - // Float version: - const float sign_x = sign(x); - const float t = 1.0/(1.0 + 0.47047*abs(x)); - const float result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))* - exp(-(x*x)); - return result * sign_x; -} - -float4 erft(const float4 x) -{ - // Requires: x is the standard parameter to erf(). - // Returns: Approximate erf() with the hyperbolic tangent. The error is - // visually noticeable, but it's blazing fast and perceptually - // close...at least on ATI hardware. See: - // http://www.maplesoft.com/applications/view.aspx?SID=5525&view=html - // Warning: Only use this if your hardware drivers correctly implement - // tanh(): My nVidia 8800GTS returns garbage output. - return tanh(1.202760580 * x); -} - -float3 erft(const float3 x) -{ - // Float3 version: - return tanh(1.202760580 * x); -} - -float2 erft(const float2 x) -{ - // Float2 version: - return tanh(1.202760580 * x); -} - -float erft(const float x) -{ - // Float version: - return tanh(1.202760580 * x); -} - -float4 erf(const float4 x) -{ - // Requires: x is the standard parameter to erf(). - // Returns: Some approximation of erf(x), depending on user settings. - #ifdef ERF_FAST_APPROXIMATION - return erft(x); - #else - return erf6(x); - #endif -} - -float3 erf(const float3 x) -{ - // Float3 version: - #ifdef ERF_FAST_APPROXIMATION - return erft(x); - #else - return erf6(x); - #endif -} - -float2 erf(const float2 x) -{ - // Float2 version: - #ifdef ERF_FAST_APPROXIMATION - return erft(x); - #else - return erf6(x); - #endif -} - -float erf(const float x) -{ - // Float version: - #ifdef ERF_FAST_APPROXIMATION - return erft(x); - #else - return erf6(x); - #endif -} - - -/////////////////////////// COMPLETE GAMMA FUNCTION ////////////////////////// - -float4 gamma_impl(const float4 s, const float4 s_inv) -{ - // Requires: 1.) s is the standard parameter to the gamma function, and - // it should lie in the [0, 36] range. - // 2.) s_inv = 1.0/s. This implementation function requires - // the caller to precompute this value, giving users the - // opportunity to reuse it. - // Returns: Return approximate gamma function (real-numbered factorial) - // output using the Lanczos approximation with two coefficients - // calculated using Paul Godfrey's method here: - // http://my.fit.edu/~gabdo/gamma.txt - // An optimal g value for s in [0, 36] is ~1.12906830989, with - // a maximum relative error of 0.000463 for 2**16 equally - // evals. We could use three coeffs (0.0000346 error) without - // hurting latency, but this allows more parallelism with - // outside instructions. - static const float4 g = 1.12906830989.xxxx; - static const float4 c0 = 0.8109119309638332633713423362694399653724431.xxxx; - static const float4 c1 = 0.4808354605142681877121661197951496120000040.xxxx; - static const float4 e = 2.71828182845904523536028747135266249775724709.xxxx; - const float4 sph = s + 0.5.xxxx; - const float4 lanczos_sum = c0 + c1/(s + 1.0.xxxx); - const float4 base = (sph + g)/e; // or (s + g + float4(0.5))/e - // gamma(s + 1) = base**sph * lanczos_sum; divide by s for gamma(s). - // This has less error for small s's than (s -= 1.0) at the beginning. - return (pow(base, sph) * lanczos_sum) * s_inv; -} - -float3 gamma_impl(const float3 s, const float3 s_inv) -{ - // Float3 version: - static const float3 g = 1.12906830989.xxx; - static const float3 c0 = 0.8109119309638332633713423362694399653724431.xxx; - static const float3 c1 = 0.4808354605142681877121661197951496120000040.xxx; - static const float3 e = 2.71828182845904523536028747135266249775724709.xxx; - const float3 sph = s + 0.5.xxx; - const float3 lanczos_sum = c0 + c1/(s + 1.0.xxx); - const float3 base = (sph + g)/e; - return (pow(base, sph) * lanczos_sum) * s_inv; -} - -float2 gamma_impl(const float2 s, const float2 s_inv) -{ - // Float2 version: - static const float2 g = 1.12906830989.xx; - static const float2 c0 = 0.8109119309638332633713423362694399653724431.xx; - static const float2 c1 = 0.4808354605142681877121661197951496120000040.xx; - static const float2 e = 2.71828182845904523536028747135266249775724709.xx; - const float2 sph = s + 0.5.xx; - const float2 lanczos_sum = c0 + c1/(s + 1.0.xx); - const float2 base = (sph + g)/e; - return (pow(base, sph) * lanczos_sum) * s_inv; -} - -float gamma_impl(const float s, const float s_inv) -{ - // Float version: - static const float g = 1.12906830989; - static const float c0 = 0.8109119309638332633713423362694399653724431; - static const float c1 = 0.4808354605142681877121661197951496120000040; - static const float e = 2.71828182845904523536028747135266249775724709; - const float sph = s + 0.5; - const float lanczos_sum = c0 + c1/(s + 1.0); - const float base = (sph + g)/e; - return (pow(base, sph) * lanczos_sum) * s_inv; -} - -float4 gamma(const float4 s) -{ - // Requires: s is the standard parameter to the gamma function, and it - // should lie in the [0, 36] range. - // Returns: Return approximate gamma function output with a maximum - // relative error of 0.000463. See gamma_impl for details. - return gamma_impl(s, 1.0.xxxx/s); -} - -float3 gamma(const float3 s) -{ - // Float3 version: - return gamma_impl(s, 1.0.xxx/s); -} - -float2 gamma(const float2 s) -{ - // Float2 version: - return gamma_impl(s, 1.0.xx/s); -} - -float gamma(const float s) -{ - // Float version: - return gamma_impl(s, 1.0/s); -} - - -//////////////// INCOMPLETE GAMMA FUNCTIONS (RESTRICTED INPUT) /////////////// - -// Lower incomplete gamma function for small s and z (implementation): -float4 ligamma_small_z_impl(const float4 s, const float4 z, const float4 s_inv) -{ - // Requires: 1.) s < ~0.5 - // 2.) z <= ~0.775075 - // 3.) s_inv = 1.0/s (precomputed for outside reuse) - // Returns: A series representation for the lower incomplete gamma - // function for small s and small z (4 terms). - // The actual "rolled up" summation looks like: - // last_sign = 1.0; last_pow = 1.0; last_factorial = 1.0; - // sum = last_sign * last_pow / ((s + k) * last_factorial) - // for(int i = 0; i < 4; ++i) - // { - // last_sign *= -1.0; last_pow *= z; last_factorial *= i; - // sum += last_sign * last_pow / ((s + k) * last_factorial); - // } - // Unrolled, constant-unfolded and arranged for madds and parallelism: - const float4 scale = pow(z, s); - float4 sum = s_inv; // Summation iteration 0 result - // Summation iterations 1, 2, and 3: - const float4 z_sq = z*z; - const float4 denom1 = s + 1.0.xxxx; - const float4 denom2 = 2.0*s + 4.0.xxxx; - const float4 denom3 = 6.0*s + 18.0.xxxx; - //float4 denom4 = 24.0*s + float4(96.0); - sum -= z/denom1; - sum += z_sq/denom2; - sum -= z * z_sq/denom3; - //sum += z_sq * z_sq / denom4; - // Scale and return: - return scale * sum; -} - -float3 ligamma_small_z_impl(const float3 s, const float3 z, const float3 s_inv) -{ - // Float3 version: - const float3 scale = pow(z, s); - float3 sum = s_inv; - const float3 z_sq = z*z; - const float3 denom1 = s + 1.0.xxx; - const float3 denom2 = 2.0*s + 4.0.xxx; - const float3 denom3 = 6.0*s + 18.0.xxx; - sum -= z/denom1; - sum += z_sq/denom2; - sum -= z * z_sq/denom3; - return scale * sum; -} - -float2 ligamma_small_z_impl(const float2 s, const float2 z, const float2 s_inv) -{ - // Float2 version: - const float2 scale = pow(z, s); - float2 sum = s_inv; - const float2 z_sq = z*z; - const float2 denom1 = s + 1.0.xx; - const float2 denom2 = 2.0*s + 4.0.xx; - const float2 denom3 = 6.0*s + 18.0.xx; - sum -= z/denom1; - sum += z_sq/denom2; - sum -= z * z_sq/denom3; - return scale * sum; -} - -float ligamma_small_z_impl(const float s, const float z, const float s_inv) -{ - // Float version: - const float scale = pow(z, s); - float sum = s_inv; - const float z_sq = z*z; - const float denom1 = s + 1.0; - const float denom2 = 2.0*s + 4.0; - const float denom3 = 6.0*s + 18.0; - sum -= z/denom1; - sum += z_sq/denom2; - sum -= z * z_sq/denom3; - return scale * sum; -} - -// Upper incomplete gamma function for small s and large z (implementation): -float4 uigamma_large_z_impl(const float4 s, const float4 z) -{ - // Requires: 1.) s < ~0.5 - // 2.) z > ~0.775075 - // Returns: Gauss's continued fraction representation for the upper - // incomplete gamma function (4 terms). - // The "rolled up" continued fraction looks like this. The denominator - // is truncated, and it's calculated "from the bottom up:" - // denom = float4('inf'); - // float4 one = float4(1.0); - // for(int i = 4; i > 0; --i) - // { - // denom = ((i * 2.0) - one) + z - s + (i * (s - i))/denom; - // } - // Unrolled and constant-unfolded for madds and parallelism: - const float4 numerator = pow(z, s) * exp(-z); - float4 denom = 7.0.xxxx + z - s; - denom = 5.0.xxxx + z - s + (3.0*s - 9.0.xxxx)/denom; - denom = 3.0.xxxx + z - s + (2.0*s - 4.0.xxxx)/denom; - denom = 1.0.xxxx + z - s + (s - 1.0.xxxx)/denom; - return numerator / denom; -} - -float3 uigamma_large_z_impl(const float3 s, const float3 z) -{ - // Float3 version: - const float3 numerator = pow(z, s) * exp(-z); - float3 denom = 7.0.xxx + z - s; - denom = 5.0.xxx + z - s + (3.0*s - 9.0.xxx)/denom; - denom = 3.0.xxx + z - s + (2.0*s - 4.0.xxx)/denom; - denom = 1.0.xxx + z - s + (s - 1.0.xxx)/denom; - return numerator / denom; -} - -float2 uigamma_large_z_impl(const float2 s, const float2 z) -{ - // Float2 version: - const float2 numerator = pow(z, s) * exp(-z); - float2 denom = 7.0.xx + z - s; - denom = 5.0.xx + z - s + (3.0*s - 9.0.xx)/denom; - denom = 3.0.xx + z - s + (2.0*s - 4.0.xx)/denom; - denom = 1.0.xx + z - s + (s - 1.0.xx)/denom; - return numerator / denom; -} - -float uigamma_large_z_impl(const float s, const float z) -{ - // Float version: - const float numerator = pow(z, s) * exp(-z); - float denom = 7.0 + z - s; - denom = 5.0 + z - s + (3.0*s - 9.0)/denom; - denom = 3.0 + z - s + (2.0*s - 4.0)/denom; - denom = 1.0 + z - s + (s - 1.0)/denom; - return numerator / denom; -} - -// Normalized lower incomplete gamma function for small s (implementation): -float4 normalized_ligamma_impl(const float4 s, const float4 z, - const float4 s_inv, const float4 gamma_s_inv) -{ - // Requires: 1.) s < ~0.5 - // 2.) s_inv = 1/s (precomputed for outside reuse) - // 3.) gamma_s_inv = 1/gamma(s) (precomputed for outside reuse) - // Returns: Approximate the normalized lower incomplete gamma function - // for s < 0.5. Since we only care about s < 0.5, we only need - // to evaluate two branches (not four) based on z. Each branch - // uses four terms, with a max relative error of ~0.00182. The - // branch threshold and specifics were adapted for fewer terms - // from Gil/Segura/Temme's paper here: - // http://oai.cwi.nl/oai/asset/20433/20433B.pdf - // Evaluate both branches: Real branches test slower even when available. - static const float4 thresh = 0.775075.xxxx; - const bool4 z_is_large = z > thresh; - const float4 large_z = 1.0.xxxx - uigamma_large_z_impl(s, z) * gamma_s_inv; - const float4 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv; - // Combine the results from both branches: - return large_z * float4(z_is_large.xxxx) + small_z * float4(!z_is_large.xxxx); -} - -float3 normalized_ligamma_impl(const float3 s, const float3 z, - const float3 s_inv, const float3 gamma_s_inv) -{ - // Float3 version: - static const float3 thresh = 0.775075.xxx; - const bool3 z_is_large = z > thresh; - const float3 large_z = 1.0.xxx - uigamma_large_z_impl(s, z) * gamma_s_inv; - const float3 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv; - return large_z * float3(z_is_large.xxx) + small_z * float3(!z_is_large.xxx); -} - -float2 normalized_ligamma_impl(const float2 s, const float2 z, - const float2 s_inv, const float2 gamma_s_inv) -{ - // Float2 version: - static const float2 thresh = 0.775075.xx; - const bool2 z_is_large = z > thresh; - const float2 large_z = 1.0.xx - uigamma_large_z_impl(s, z) * gamma_s_inv; - const float2 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv; - return large_z * float2(z_is_large.xx) + small_z * float2(!z_is_large.xx); -} - -float normalized_ligamma_impl(const float s, const float z, - const float s_inv, const float gamma_s_inv) -{ - // Float version: - static const float thresh = 0.775075; - const bool z_is_large = z > thresh; - const float large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv; - const float small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv; - return large_z * float(z_is_large) + small_z * float(!z_is_large); -} - -// Normalized lower incomplete gamma function for small s: -float4 normalized_ligamma(const float4 s, const float4 z) -{ - // Requires: s < ~0.5 - // Returns: Approximate the normalized lower incomplete gamma function - // for s < 0.5. See normalized_ligamma_impl() for details. - const float4 s_inv = 1.0.xxxx/s; - const float4 gamma_s_inv = 1.0.xxxx/gamma_impl(s, s_inv); - return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv); -} - -float3 normalized_ligamma(const float3 s, const float3 z) -{ - // Float3 version: - const float3 s_inv = 1.0.xxx/s; - const float3 gamma_s_inv = 1.0.xxx/gamma_impl(s, s_inv); - return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv); -} - -float2 normalized_ligamma(const float2 s, const float2 z) -{ - // Float2 version: - const float2 s_inv = 1.0.xx/s; - const float2 gamma_s_inv = 1.0.xx/gamma_impl(s, s_inv); - return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv); -} - -float normalized_ligamma(const float s, const float z) -{ - // Float version: - const float s_inv = 1.0/s; - const float gamma_s_inv = 1.0/gamma_impl(s, s_inv); - return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv); -} - - -#endif // SPECIAL_FUNCTIONS_H - - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/user-cgp-constants.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/user-cgp-constants.fxh deleted file mode 100644 index 9e750d0c5..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/user-cgp-constants.fxh +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef USER_CGP_CONSTANTS_H -#define USER_CGP_CONSTANTS_H - -// IMPORTANT: -// These constants MUST be set appropriately for the settings in crt-royale.cgp -// (or whatever related .cgp file you're using). If they aren't, you're likely -// to get artifacts, the wrong phosphor mask size, etc. I wish these could be -// set directly in the .cgp file to make things easier, but...they can't. - -// PASS SCALES AND RELATED CONSTANTS: -// Copy the absolute scale_x for BLOOM_APPROX. There are two major versions of -// this shader: One does a viewport-scale bloom, and the other skips it. The -// latter benefits from a higher bloom_approx_scale_x, so save both separately: -static const float bloom_approx_size_x = 320.0; -static const float bloom_approx_size_x_for_fake = 400.0; -// Copy the viewport-relative scales of the phosphor mask resize passes -// (MASK_RESIZE and the pass immediately preceding it): -static const float2 mask_resize_viewport_scale = float2(0.0625, 0.0625); -// Copy the geom_max_aspect_ratio used to calculate the MASK_RESIZE scales, etc.: -static const float geom_max_aspect_ratio = 4.0/3.0; - -// PHOSPHOR MASK TEXTURE CONSTANTS: -// Set the following constants to reflect the properties of the phosphor mask -// texture named in crt-royale.cgp. The shader optionally resizes a mask tile -// based on user settings, then repeats a single tile until filling the screen. -// The shader must know the input texture size (default 64x64), and to manually -// resize, it must also know the horizontal triads per tile (default 8). -static const float2 mask_texture_small_size = 64.0.xx; -static const float2 mask_texture_large_size = 512.0.xx; -static const float mask_triads_per_tile = 8.0; -// We need the average brightness of the phosphor mask to compensate for the -// dimming it causes. The following four values are roughly correct for the -// masks included with the shader. Update the value for any LUT texture you -// change. [Un]comment "#define PHOSPHOR_MASK_GRILLE14" depending on whether -// the loaded aperture grille uses 14-pixel or 15-pixel stripes (default 15). -//#define PHOSPHOR_MASK_GRILLE14 -static const float mask_grille14_avg_color = 50.6666666/255.0; - // TileableLinearApertureGrille14Wide7d33Spacing*.png - // TileableLinearApertureGrille14Wide10And6Spacing*.png -static const float mask_grille15_avg_color = 53.0/255.0; - // TileableLinearApertureGrille15Wide6d33Spacing*.png - // TileableLinearApertureGrille15Wide8And5d5Spacing*.png -static const float mask_slot_avg_color = 46.0/255.0; - // TileableLinearSlotMask15Wide9And4d5Horizontal8VerticalSpacing*.png - // TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing*.png -static const float mask_shadow_avg_color = 41.0/255.0; - // TileableLinearShadowMask*.png - // TileableLinearShadowMaskEDP*.png - -#ifdef PHOSPHOR_MASK_GRILLE14 - static const float mask_grille_avg_color = mask_grille14_avg_color; -#else - static const float mask_grille_avg_color = mask_grille15_avg_color; -#endif - - -#endif // USER_CGP_CONSTANTS_H - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/user-settings.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/user-settings.fxh deleted file mode 100644 index e43cee77a..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/include/user-settings.fxh +++ /dev/null @@ -1,359 +0,0 @@ -#ifndef USER_SETTINGS_H -#define USER_SETTINGS_H - -///////////////////////////// DRIVER CAPABILITIES //////////////////////////// - -// The Cg compiler uses different "profiles" with different capabilities. -// This shader requires a Cg compilation profile >= arbfp1, but a few options -// require higher profiles like fp30 or fp40. The shader can't detect profile -// or driver capabilities, so instead you must comment or uncomment the lines -// below with "//" before "#define." Disable an option if you get compilation -// errors resembling those listed. Generally speaking, all of these options -// will run on nVidia cards, but only DRIVERS_ALLOW_TEX2DBIAS (if that) is -// likely to run on ATI/AMD, due to the Cg compiler's profile limitations. - -// Derivatives: Unsupported on fp20, ps_1_1, ps_1_2, ps_1_3, and arbfp1. -// Among other things, derivatives help us fix anisotropic filtering artifacts -// with curved manually tiled phosphor mask coords. Related errors: -// error C3004: function "float2 ddx(float2);" not supported in this profile -// error C3004: function "float2 ddy(float2);" not supported in this profile - //#define DRIVERS_ALLOW_DERIVATIVES - -// Fine derivatives: Unsupported on older ATI cards. -// Fine derivatives enable 2x2 fragment block communication, letting us perform -// fast single-pass blur operations. If your card uses coarse derivatives and -// these are enabled, blurs could look broken. Derivatives are a prerequisite. - #ifdef DRIVERS_ALLOW_DERIVATIVES - #define DRIVERS_ALLOW_FINE_DERIVATIVES - #endif - -// Dynamic looping: Requires an fp30 or newer profile. -// This makes phosphor mask resampling faster in some cases. Related errors: -// error C5013: profile does not support "for" statements and "for" could not -// be unrolled - //#define DRIVERS_ALLOW_DYNAMIC_BRANCHES - -// Without DRIVERS_ALLOW_DYNAMIC_BRANCHES, we need to use unrollable loops. -// Using one static loop avoids overhead if the user is right, but if the user -// is wrong (loops are allowed), breaking a loop into if-blocked pieces with a -// binary search can potentially save some iterations. However, it may fail: -// error C6001: Temporary register limit of 32 exceeded; 35 registers -// needed to compile program - //#define ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS - -// tex2Dlod: Requires an fp40 or newer profile. This can be used to disable -// anisotropic filtering, thereby fixing related artifacts. Related errors: -// error C3004: function "float4 tex2Dlod(sampler2D, float4);" not supported in -// this profile - //#define DRIVERS_ALLOW_TEX2DLOD - -// tex2Dbias: Requires an fp30 or newer profile. This can be used to alleviate -// artifacts from anisotropic filtering and mipmapping. Related errors: -// error C3004: function "float4 tex2Dbias(sampler2D, float4);" not supported -// in this profile - //#define DRIVERS_ALLOW_TEX2DBIAS - -// Integrated graphics compatibility: Integrated graphics like Intel HD 4000 -// impose stricter limitations on register counts and instructions. Enable -// INTEGRATED_GRAPHICS_COMPATIBILITY_MODE if you still see error C6001 or: -// error C6002: Instruction limit of 1024 exceeded: 1523 instructions needed -// to compile program. -// Enabling integrated graphics compatibility mode will automatically disable: -// 1.) PHOSPHOR_MASK_MANUALLY_RESIZE: The phosphor mask will be softer. -// (This may be reenabled in a later release.) -// 2.) RUNTIME_GEOMETRY_MODE -// 3.) The high-quality 4x4 Gaussian resize for the bloom approximation - //#define INTEGRATED_GRAPHICS_COMPATIBILITY_MODE - - -//////////////////////////// USER CODEPATH OPTIONS /////////////////////////// - -// To disable a #define option, turn its line into a comment with "//." - -// RUNTIME VS. COMPILE-TIME OPTIONS (Major Performance Implications): -// Enable runtime shader parameters in the Retroarch (etc.) GUI? They override -// many of the options in this file and allow real-time tuning, but many of -// them are slower. Disabling them and using this text file will boost FPS. -#define RUNTIME_SHADER_PARAMS_ENABLE -// Specify the phosphor bloom sigma at runtime? This option is 10% slower, but -// it's the only way to do a wide-enough full bloom with a runtime dot pitch. -#define RUNTIME_PHOSPHOR_BLOOM_SIGMA -// Specify antialiasing weight parameters at runtime? (Costs ~20% with cubics) -#define RUNTIME_ANTIALIAS_WEIGHTS -// Specify subpixel offsets at runtime? (WARNING: EXTREMELY EXPENSIVE!) -//#define RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS -// Make beam_horiz_filter and beam_horiz_linear_rgb_weight into runtime shader -// parameters? This will require more math or dynamic branching. -#define RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE -// Specify the tilt at runtime? This makes things about 3% slower. -#define RUNTIME_GEOMETRY_TILT -// Specify the geometry mode at runtime? -#define RUNTIME_GEOMETRY_MODE -// Specify the phosphor mask type (aperture grille, slot mask, shadow mask) and -// mode (Lanczos-resize, hardware resize, or tile 1:1) at runtime, even without -// dynamic branches? This is cheap if mask_resize_viewport_scale is small. -#define FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT - -// PHOSPHOR MASK: -// Manually resize the phosphor mask for best results (slower)? Disabling this -// removes the option to do so, but it may be faster without dynamic branches. - #define PHOSPHOR_MASK_MANUALLY_RESIZE -// If we sinc-resize the mask, should we Lanczos-window it (slower but better)? - #define PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW -// Larger blurs are expensive, but we need them to blur larger triads. We can -// detect the right blur if the triad size is static or our profile allows -// dynamic branches, but otherwise we use the largest blur the user indicates -// they might need: - #define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS - //#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS - //#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS - //#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS - // Here's a helpful chart: - // MaxTriadSize BlurSize MinTriadCountsByResolution - // 3.0 9.0 480/640/960/1920 triads at 1080p/1440p/2160p/4320p, 4:3 aspect - // 6.0 17.0 240/320/480/960 triads at 1080p/1440p/2160p/4320p, 4:3 aspect - // 9.0 25.0 160/213/320/640 triads at 1080p/1440p/2160p/4320p, 4:3 aspect - // 12.0 31.0 120/160/240/480 triads at 1080p/1440p/2160p/4320p, 4:3 aspect - // 18.0 43.0 80/107/160/320 triads at 1080p/1440p/2160p/4320p, 4:3 aspect - - -/////////////////////////////// USER PARAMETERS ////////////////////////////// - -// Note: Many of these static parameters are overridden by runtime shader -// parameters when those are enabled. However, many others are static codepath -// options that were cleaner or more convert to code as static constants. - -// GAMMA: - static const float crt_gamma_static = 2.5; // range [1, 5] - static const float lcd_gamma_static = 2.2; // range [1, 5] - -// LEVELS MANAGEMENT: - // Control the final multiplicative image contrast: - static const float levels_contrast_static = 1.0; // range [0, 4) - // We auto-dim to avoid clipping between passes and restore brightness - // later. Control the dim factor here: Lower values clip less but crush - // blacks more (static only for now). - static const float levels_autodim_temp = 0.5; // range (0, 1] - -// HALATION/DIFFUSION/BLOOM: - // Halation weight: How much energy should be lost to electrons bounding - // around under the CRT glass and exciting random phosphors? - static const float halation_weight_static = 0.0; // range [0, 1] - // Refractive diffusion weight: How much light should spread/diffuse from - // refracting through the CRT glass? - static const float diffusion_weight_static = 0.075; // range [0, 1] - // Underestimate brightness: Bright areas bloom more, but we can base the - // bloom brightpass on a lower brightness to sharpen phosphors, or a higher - // brightness to soften them. Low values clip, but >= 0.8 looks okay. - static const float bloom_underestimate_levels_static = 0.8; // range [0, 5] - // Blur all colors more than necessary for a softer phosphor bloom? - static const float bloom_excess_static = 0.0; // range [0, 1] - // The BLOOM_APPROX pass approximates a phosphor blur early on with a small - // blurred resize of the input (convergence offsets are applied as well). - // There are three filter options (static option only for now): - // 0.) Bilinear resize: A fast, close approximation to a 4x4 resize - // if min_allowed_viewport_triads and the BLOOM_APPROX resolution are sane - // and beam_max_sigma is low. - // 1.) 3x3 resize blur: Medium speed, soft/smeared from bilinear blurring, - // always uses a static sigma regardless of beam_max_sigma or - // mask_num_triads_desired. - // 2.) True 4x4 Gaussian resize: Slowest, technically correct. - // These options are more pronounced for the fast, unbloomed shader version. - static const float bloom_approx_filter_static = 2.0; - -// ELECTRON BEAM SCANLINE DISTRIBUTION: - // How many scanlines should contribute light to each pixel? Using more - // scanlines is slower (especially for a generalized Gaussian) but less - // distorted with larger beam sigmas (especially for a pure Gaussian). The - // max_beam_sigma at which the closest unused weight is guaranteed < - // 1.0/255.0 (for a 3x antialiased pure Gaussian) is: - // 2 scanlines: max_beam_sigma = 0.2089; distortions begin ~0.34; 141.7 FPS pure, 131.9 FPS generalized - // 3 scanlines, max_beam_sigma = 0.3879; distortions begin ~0.52; 137.5 FPS pure; 123.8 FPS generalized - // 4 scanlines, max_beam_sigma = 0.5723; distortions begin ~0.70; 134.7 FPS pure; 117.2 FPS generalized - // 5 scanlines, max_beam_sigma = 0.7591; distortions begin ~0.89; 131.6 FPS pure; 112.1 FPS generalized - // 6 scanlines, max_beam_sigma = 0.9483; distortions begin ~1.08; 127.9 FPS pure; 105.6 FPS generalized - static const float beam_num_scanlines = 3.0; // range [2, 6] - // A generalized Gaussian beam varies shape with color too, now just width. - // It's slower but more flexible (static option only for now). - static const bool beam_generalized_gaussian = true; - // What kind of scanline antialiasing do you want? - // 0: Sample weights at 1x; 1: Sample weights at 3x; 2: Compute an integral - // Integrals are slow (especially for generalized Gaussians) and rarely any - // better than 3x antialiasing (static option only for now). - static const float beam_antialias_level = 1.0; // range [0, 2] - // Min/max standard deviations for scanline beams: Higher values widen and - // soften scanlines. Depending on other options, low min sigmas can alias. - static const float beam_min_sigma_static = 0.02; // range (0, 1] - static const float beam_max_sigma_static = 0.3; // range (0, 1] - // Beam width varies as a function of color: A power function (0) is more - // configurable, but a spherical function (1) gives the widest beam - // variability without aliasing (static option only for now). - static const float beam_spot_shape_function = 0.0; - // Spot shape power: Powers <= 1 give smoother spot shapes but lower - // sharpness. Powers >= 1.0 are awful unless mix/max sigmas are close. - static const float beam_spot_power_static = 1.0/3.0; // range (0, 16] - // Generalized Gaussian max shape parameters: Higher values give flatter - // scanline plateaus and steeper dropoffs, simultaneously widening and - // sharpening scanlines at the cost of aliasing. 2.0 is pure Gaussian, and - // values > ~40.0 cause artifacts with integrals. - static const float beam_min_shape_static = 2.0; // range [2, 32] - static const float beam_max_shape_static = 4.0; // range [2, 32] - // Generalized Gaussian shape power: Affects how quickly the distribution - // changes shape from Gaussian to steep/plateaued as color increases from 0 - // to 1.0. Higher powers appear softer for most colors, and lower powers - // appear sharper for most colors. - static const float beam_shape_power_static = 1.0/4.0; // range (0, 16] - // What filter should be used to sample scanlines horizontally? - // 0: Quilez (fast), 1: Gaussian (configurable), 2: Lanczos2 (sharp) - static const float beam_horiz_filter_static = 0.0; - // Standard deviation for horizontal Gaussian resampling: - static const float beam_horiz_sigma_static = 0.35; // range (0, 2/3] - // Do horizontal scanline sampling in linear RGB (correct light mixing), - // gamma-encoded RGB (darker, hard spot shape, may better match bandwidth- - // limiting circuitry in some CRT's), or a weighted avg.? - static const float beam_horiz_linear_rgb_weight_static = 1.0; // range [0, 1] - // Simulate scanline misconvergence? This needs 3x horizontal texture - // samples and 3x texture samples of BLOOM_APPROX and HALATION_BLUR in - // later passes (static option only for now). - static const bool beam_misconvergence = true; - // Convergence offsets in x/y directions for R/G/B scanline beams in units - // of scanlines. Positive offsets go right/down; ranges [-2, 2] - static const float2 convergence_offsets_r_static = float2(0.1, 0.2); - static const float2 convergence_offsets_g_static = float2(0.3, 0.4); - static const float2 convergence_offsets_b_static = float2(0.5, 0.6); - // Detect interlacing (static option only for now)? - static const bool interlace_detect_static = true; - // Assume 1080-line sources are interlaced? - static const bool interlace_1080i_static = false; - // For interlaced sources, assume TFF (top-field first) or BFF order? - // (Whether this matters depends on the nature of the interlaced input.) - static const bool interlace_bff_static = false; - -// ANTIALIASING: - // What AA level do you want for curvature/overscan/subpixels? Options: - // 0x (none), 1x (sample subpixels), 4x, 5x, 6x, 7x, 8x, 12x, 16x, 20x, 24x - // (Static option only for now) - static const float aa_level = 12.0; // range [0, 24] - // What antialiasing filter do you want (static option only)? Options: - // 0: Box (separable), 1: Box (cylindrical), - // 2: Tent (separable), 3: Tent (cylindrical), - // 4: Gaussian (separable), 5: Gaussian (cylindrical), - // 6: Cubic* (separable), 7: Cubic* (cylindrical, poor) - // 8: Lanczos Sinc (separable), 9: Lanczos Jinc (cylindrical, poor) - // * = Especially slow with RUNTIME_ANTIALIAS_WEIGHTS - static const float aa_filter = 6.0; // range [0, 9] - // Flip the sample grid on odd/even frames (static option only for now)? - static const bool aa_temporal = false; - // Use RGB subpixel offsets for antialiasing? The pixel is at green, and - // the blue offset is the negative r offset; range [0, 0.5] - static const float2 aa_subpixel_r_offset_static = float2(-1.0/3.0, 0.0);//float2(0.0); - // Cubics: See http://www.imagemagick.org/Usage/filter/#mitchell - // 1.) "Keys cubics" with B = 1 - 2C are considered the highest quality. - // 2.) C = 0.5 (default) is Catmull-Rom; higher C's apply sharpening. - // 3.) C = 1.0/3.0 is the Mitchell-Netravali filter. - // 4.) C = 0.0 is a soft spline filter. - static const float aa_cubic_c_static = 0.5; // range [0, 4] - // Standard deviation for Gaussian antialiasing: Try 0.5/aa_pixel_diameter. - static const float aa_gauss_sigma_static = 0.5; // range [0.0625, 1.0] - -// PHOSPHOR MASK: - // Mask type: 0 = aperture grille, 1 = slot mask, 2 = EDP shadow mask - static const float mask_type_static = 1.0; // range [0, 2] - // We can sample the mask three ways. Pick 2/3 from: Pretty/Fast/Flexible. - // 0.) Sinc-resize to the desired dot pitch manually (pretty/slow/flexible). - // This requires PHOSPHOR_MASK_MANUALLY_RESIZE to be #defined. - // 1.) Hardware-resize to the desired dot pitch (ugly/fast/flexible). This - // is halfway decent with LUT mipmapping but atrocious without it. - // 2.) Tile it without resizing at a 1:1 texel:pixel ratio for flat coords - // (pretty/fast/inflexible). Each input LUT has a fixed dot pitch. - // This mode reuses the same masks, so triads will be enormous unless - // you change the mask LUT filenames in your .cgp file. - static const float mask_sample_mode_static = 0.0; // range [0, 2] - // Prefer setting the triad size (0.0) or number on the screen (1.0)? - // If RUNTIME_PHOSPHOR_BLOOM_SIGMA isn't #defined, the specified triad size - // will always be used to calculate the full bloom sigma statically. - static const float mask_specify_num_triads_static = 0.0; // range [0, 1] - // Specify the phosphor triad size, in pixels. Each tile (usually with 8 - // triads) will be rounded to the nearest integer tile size and clamped to - // obey minimum size constraints (imposed to reduce downsize taps) and - // maximum size constraints (imposed to have a sane MASK_RESIZE FBO size). - // To increase the size limit, double the viewport-relative scales for the - // two MASK_RESIZE passes in crt-royale.cgp and user-cgp-contants.h. - // range [1, mask_texture_small_size/mask_triads_per_tile] - static const float mask_triad_size_desired_static = 24.0 / 8.0; - // If mask_specify_num_triads is 1.0/true, we'll go by this instead (the - // final size will be rounded and constrained as above); default 480.0 - static const float mask_num_triads_desired_static = 480.0; - // How many lobes should the sinc/Lanczos resizer use? More lobes require - // more samples and avoid moire a bit better, but some is unavoidable - // depending on the destination size (static option for now). - static const float mask_sinc_lobes = 3.0; // range [2, 4] - // The mask is resized using a variable number of taps in each dimension, - // but some Cg profiles always fetch a constant number of taps no matter - // what (no dynamic branching). We can limit the maximum number of taps if - // we statically limit the minimum phosphor triad size. Larger values are - // faster, but the limit IS enforced (static option only, forever); - // range [1, mask_texture_small_size/mask_triads_per_tile] - // TODO: Make this 1.0 and compensate with smarter sampling! - static const float mask_min_allowed_triad_size = 2.0; - -// GEOMETRY: - // Geometry mode: - // 0: Off (default), 1: Spherical mapping (like cgwg's), - // 2: Alt. spherical mapping (more bulbous), 3: Cylindrical/Trinitron - static const float geom_mode_static = 0.0; // range [0, 3] - // Radius of curvature: Measured in units of your viewport's diagonal size. - static const float geom_radius_static = 2.0; // range [1/(2*pi), 1024] - // View dist is the distance from the player to their physical screen, in - // units of the viewport's diagonal size. It controls the field of view. - static const float geom_view_dist_static = 2.0; // range [0.5, 1024] - // Tilt angle in radians (clockwise around up and right vectors): - static const float2 geom_tilt_angle_static = float2(0.0, 0.0); // range [-pi, pi] - // Aspect ratio: When the true viewport size is unknown, this value is used - // to help convert between the phosphor triad size and count, along with - // the mask_resize_viewport_scale constant from user-cgp-constants.h. Set - // this equal to Retroarch's display aspect ratio (DAR) for best results; - // range [1, geom_max_aspect_ratio from user-cgp-constants.h]; - // default (256/224)*(54/47) = 1.313069909 (see below) - static const float geom_aspect_ratio_static = 1.313069909; - // Before getting into overscan, here's some general aspect ratio info: - // - DAR = display aspect ratio = SAR * PAR; as in your Retroarch setting - // - SAR = storage aspect ratio = DAR / PAR; square pixel emulator frame AR - // - PAR = pixel aspect ratio = DAR / SAR; holds regardless of cropping - // Geometry processing has to "undo" the screen-space 2D DAR to calculate - // 3D view vectors, then reapplies the aspect ratio to the simulated CRT in - // uv-space. To ensure the source SAR is intended for a ~4:3 DAR, either: - // a.) Enable Retroarch's "Crop Overscan" - // b.) Readd horizontal padding: Set overscan to e.g. N*(1.0, 240.0/224.0) - // Real consoles use horizontal black padding in the signal, but emulators - // often crop this without cropping the vertical padding; a 256x224 [S]NES - // frame (8:7 SAR) is intended for a ~4:3 DAR, but a 256x240 frame is not. - // The correct [S]NES PAR is 54:47, found by blargg and NewRisingSun: - // http://board.zsnes.com/phpBB3/viewtopic.php?f=22&t=11928&start=50 - // http://forums.nesdev.com/viewtopic.php?p=24815#p24815 - // For flat output, it's okay to set DAR = [existing] SAR * [correct] PAR - // without doing a. or b., but horizontal image borders will be tighter - // than vertical ones, messing up curvature and overscan. Fixing the - // padding first corrects this. - // Overscan: Amount to "zoom in" before cropping. You can zoom uniformly - // or adjust x/y independently to e.g. readd horizontal padding, as noted - // above: Values < 1.0 zoom out; range (0, inf) - static const float2 geom_overscan_static = float2(1.0, 1.0);// * 1.005 * (1.0, 240/224.0) - // Compute a proper pixel-space to texture-space matrix even without ddx()/ - // ddy()? This is ~8.5% slower but improves antialiasing/subpixel filtering - // with strong curvature (static option only for now). - static const bool geom_force_correct_tangent_matrix = true; - -// BORDERS: - // Rounded border size in texture uv coords: - static const float border_size_static = 0.015; // range [0, 0.5] - // Border darkness: Moderate values darken the border smoothly, and high - // values make the image very dark just inside the border: - static const float border_darkness_static = 2.0; // range [0, inf) - // Border compression: High numbers compress border transitions, narrowing - // the dark border area. - static const float border_compress_static = 2.5; // range [1, inf) - - -#endif // USER_SETTINGS_H - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-horizontal.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-horizontal.fxh deleted file mode 100644 index 31031bfed..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-horizontal.fxh +++ /dev/null @@ -1,97 +0,0 @@ -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -// PASS SETTINGS: -// gamma-management.h needs to know what kind of pipeline we're using and -// what pass this is in that pipeline. This will become obsolete if/when we -// can #define things like this in the .cgp preset file. -//#define GAMMA_ENCODE_EVERY_FBO -//#define FIRST_PASS -//#define LAST_PASS -//#define SIMULATE_CRT_ON_LCD -//#define SIMULATE_GBA_ON_LCD -//#define SIMULATE_LCD_ON_CRT -//#define SIMULATE_GBA_ON_CRT - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -// #included by vertex shader: -#include "../include/gamma-management.fxh" -#include "../include/blur-functions.fxh" - -///////////////////////////////// STRUCTURES ///////////////////////////////// - -struct out_vertex_p4 -{ - float2 blur_dxdy : TEXCOORD1; -}; - - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - - -// Vertex shader generating a triangle covering the entire screen -void VS_Blur9Fast_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p4 OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - -/* float2 texture_size = 1.0/NormalizedNativePixelSize; - float2 output_size = (ViewportSize*BufferToViewportRatio); - float2 video_size = 1.0/NormalizedNativePixelSize; -*/ -// float2 texture_size = float2(320.0, 240.0); - float2 texture_size = HALATION_BLUR_texture_size; - float2 output_size = VIEWPORT_SIZE; -// float2 output_size = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0); - // float2 output_size = float2(320.0, 240.0); -// float2 output_size = 1.0/NormalizedNativePixelSize; - - // Get the uv sample distance between output pixels. Blurs are not generic - // Gaussian resizers, and correct blurs require: - // 1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0. - // 2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0 - // 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs - // Gaussian resizers would upsize using the distance between input texels - // (not output pixels), but we avoid this and consistently blur at the - // destination size. Otherwise, combining statically calculated weights - // with bilinear sample exploitation would result in terrible artifacts. - const float2 dxdy_scale = video_size/output_size; - const float2 dxdy = dxdy_scale/texture_size; - // This blur is horizontal-only, so zero out the vertical offset: - OUT.blur_dxdy = float2(dxdy.x, 0.0); -} - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -float4 PS_Blur9Fast_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p4 VAR) : SV_Target -{ - float3 color = tex2Dblur9fast(BLUR9FAST_VERTICAL, vTexCoord, VAR.blur_dxdy); - // Encode and output the blurred image: - return encode_output(float4(color, 1.0)); -} - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-vertical.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-vertical.fxh deleted file mode 100644 index 55605ecd7..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-vertical.fxh +++ /dev/null @@ -1,95 +0,0 @@ -///////////////////////////////// MIT LICENSE //////////////////////////////// - -// Copyright (C) 2014 TroggleMonkey -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -// PASS SETTINGS: -// gamma-management.h needs to know what kind of pipeline we're using and -// what pass this is in that pipeline. This will become obsolete if/when we -// can #define things like this in the .cgp preset file. -//#define GAMMA_ENCODE_EVERY_FBO -//#define FIRST_PASS -//#define LAST_PASS -//#define SIMULATE_CRT_ON_LCD -//#define SIMULATE_GBA_ON_LCD -//#define SIMULATE_LCD_ON_CRT -//#define SIMULATE_GBA_ON_CRT - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "../include/gamma-management.fxh" -#include "../include/blur-functions.fxh" - -///////////////////////////////// STRUCTURES ///////////////////////////////// - -struct out_vertex_p3 -{ - float2 blur_dxdy : TEXCOORD1; -}; - - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - - -// Vertex shader generating a triangle covering the entire screen -void VS_Blur9Fast_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p3 OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); -/* - float2 texture_size = 1.0/NormalizedNativePixelSize; - float2 output_size = (ViewportSize*BufferToViewportRatio); - float2 video_size = 1.0/NormalizedNativePixelSize; -*/ -// float2 texture_size = float2(320.0, 240.0); - float2 texture_size = BLUR9FAST_VERTICAL_texture_size; - float2 output_size = VIEWPORT_SIZE; - // float2 output_size = VIEWPORT_SIZE/4.0; -// float2 output_size = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0); -// float2 output_size = 1.0/NormalizedNativePixelSize; - - // Get the uv sample distance between output pixels. Blurs are not generic - // Gaussian resizers, and correct blurs require: - // 1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0. - // 2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0 - // 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs - // Gaussian resizers would upsize using the distance between input texels - // (not output pixels), but we avoid this and consistently blur at the - // destination size. Otherwise, combining statically calculated weights - // with bilinear sample exploitation would result in terrible artifacts. - const float2 dxdy_scale = video_size/output_size; - const float2 dxdy = dxdy_scale/texture_size; - // This blur is vertical-only, so zero out the horizontal offset: - OUT.blur_dxdy = float2(0.0, dxdy.y); -} - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -float4 PS_Blur9Fast_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p3 VAR) : SV_Target -{ - float3 color = tex2Dblur9fast(BLOOM_APPROX, vTexCoord, VAR.blur_dxdy); - // Encode and output the blurred image: - return encode_output(float4(color, 1.0)); -} diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-approx.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-approx.fxh deleted file mode 100644 index 13bb3e0ae..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-approx.fxh +++ /dev/null @@ -1,363 +0,0 @@ -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#define ORIG_LINEARIZEDvideo_size VERTICAL_SCANLINES_texture_size -#define ORIG_LINEARIZEDtexture_size VERTICAL_SCANLINES_video_size - -#define bloom_approx_scale_x (4.0/3.0) -static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0); - -#include "../include/user-settings.fxh" -#include "../include/derived-settings-and-constants.fxh" -#include "../include/bind-shader-params.fxh" -#include "../include/gamma-management.fxh" -#include "../include/blur-functions.fxh" -#include "../include/scanline-functions.fxh" -#include "../include/bloom-functions.fxh" - -/////////////////////////////////// HELPERS ////////////////////////////////// - -float3 tex2Dresize_gaussian4x4(const sampler2D tex, const float2 tex_uv, - const float2 dxdy, const float2 texture_size, const float2 texture_size_inv, - const float2 tex_uv_to_pixel_scale, const float sigma) -{ - // Requires: 1.) All requirements of gamma-management.h must be satisfied! - // 2.) filter_linearN must == "true" in your .cgp preset. - // 3.) mipmap_inputN must == "true" in your .cgp preset if - // IN.output_size << SRC.video_size. - // 4.) dxdy should contain the uv pixel spacing: - // dxdy = max(float2(1.0), - // SRC.video_size/IN.output_size)/SRC.texture_size; - // 5.) texture_size == SRC.texture_size - // 6.) texture_size_inv == float2(1.0)/SRC.texture_size - // 7.) tex_uv_to_pixel_scale == IN.output_size * - // SRC.texture_size / SRC.video_size; - // 8.) sigma is the desired Gaussian standard deviation, in - // terms of output pixels. It should be < ~0.66171875 to - // ensure the first unused sample (outside the 4x4 box) has - // a weight < 1.0/256.0. - // Returns: A true 4x4 Gaussian resize of the input. - // Description: - // Given correct inputs, this Gaussian resizer samples 4 pixel locations - // along each downsized dimension and/or 4 texel locations along each - // upsized dimension. It computes dynamic weights based on the pixel-space - // distance of each sample from the destination pixel. It is arbitrarily - // resizable and higher quality than tex2Dblur3x3_resize, but it's slower. - // TODO: Move this to a more suitable file once there are others like it. - const float denom_inv = 0.5/(sigma*sigma); - // We're taking 4x4 samples, and we're snapping to texels for upsizing. - // Find texture coords for sample 5 (second row, second column): - const float2 curr_texel = tex_uv * texture_size; - const float2 prev_texel = - floor(curr_texel - under_half.xx) + 0.5.xx; - const float2 prev_texel_uv = prev_texel * texture_size_inv; - const float2 snap = float2(dxdy <= texture_size_inv); - const float2 sample5_downsize_uv = tex_uv - 0.5 * dxdy; - const float2 sample5_uv = lerp(sample5_downsize_uv, prev_texel_uv, snap); - // Compute texture coords for other samples: - const float2 dx = float2(dxdy.x, 0.0); - const float2 sample0_uv = sample5_uv - dxdy; - const float2 sample10_uv = sample5_uv + dxdy; - const float2 sample15_uv = sample5_uv + 2.0 * dxdy; - const float2 sample1_uv = sample0_uv + dx; - const float2 sample2_uv = sample0_uv + 2.0 * dx; - const float2 sample3_uv = sample0_uv + 3.0 * dx; - const float2 sample4_uv = sample5_uv - dx; - const float2 sample6_uv = sample5_uv + dx; - const float2 sample7_uv = sample5_uv + 2.0 * dx; - const float2 sample8_uv = sample10_uv - 2.0 * dx; - const float2 sample9_uv = sample10_uv - dx; - const float2 sample11_uv = sample10_uv + dx; - const float2 sample12_uv = sample15_uv - 3.0 * dx; - const float2 sample13_uv = sample15_uv - 2.0 * dx; - const float2 sample14_uv = sample15_uv - dx; - // Load each sample: - const float3 sample0 = tex2D_linearize(tex, sample0_uv).rgb; - const float3 sample1 = tex2D_linearize(tex, sample1_uv).rgb; - const float3 sample2 = tex2D_linearize(tex, sample2_uv).rgb; - const float3 sample3 = tex2D_linearize(tex, sample3_uv).rgb; - const float3 sample4 = tex2D_linearize(tex, sample4_uv).rgb; - const float3 sample5 = tex2D_linearize(tex, sample5_uv).rgb; - const float3 sample6 = tex2D_linearize(tex, sample6_uv).rgb; - const float3 sample7 = tex2D_linearize(tex, sample7_uv).rgb; - const float3 sample8 = tex2D_linearize(tex, sample8_uv).rgb; - const float3 sample9 = tex2D_linearize(tex, sample9_uv).rgb; - const float3 sample10 = tex2D_linearize(tex, sample10_uv).rgb; - const float3 sample11 = tex2D_linearize(tex, sample11_uv).rgb; - const float3 sample12 = tex2D_linearize(tex, sample12_uv).rgb; - const float3 sample13 = tex2D_linearize(tex, sample13_uv).rgb; - const float3 sample14 = tex2D_linearize(tex, sample14_uv).rgb; - const float3 sample15 = tex2D_linearize(tex, sample15_uv).rgb; - // Compute destination pixel offsets for each sample: - const float2 dest_pixel = tex_uv * tex_uv_to_pixel_scale; - const float2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel; - const float2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel; - // Compute Gaussian sample weights: - const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv); - const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv); - const float w2 = exp(-LENGTH_SQ(sample2_offset) * denom_inv); - const float w3 = exp(-LENGTH_SQ(sample3_offset) * denom_inv); - const float w4 = exp(-LENGTH_SQ(sample4_offset) * denom_inv); - const float w5 = exp(-LENGTH_SQ(sample5_offset) * denom_inv); - const float w6 = exp(-LENGTH_SQ(sample6_offset) * denom_inv); - const float w7 = exp(-LENGTH_SQ(sample7_offset) * denom_inv); - const float w8 = exp(-LENGTH_SQ(sample8_offset) * denom_inv); - const float w9 = exp(-LENGTH_SQ(sample9_offset) * denom_inv); - const float w10 = exp(-LENGTH_SQ(sample10_offset) * denom_inv); - const float w11 = exp(-LENGTH_SQ(sample11_offset) * denom_inv); - const float w12 = exp(-LENGTH_SQ(sample12_offset) * denom_inv); - const float w13 = exp(-LENGTH_SQ(sample13_offset) * denom_inv); - const float w14 = exp(-LENGTH_SQ(sample14_offset) * denom_inv); - const float w15 = exp(-LENGTH_SQ(sample15_offset) * denom_inv); - const float weight_sum_inv = 1.0/( - w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 + - w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15); - // Weight and sum the samples: - const float3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 + - w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 + - w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 + - w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15; - return sum * weight_sum_inv; -} - -///////////////////////////////// STRUCTURES ///////////////////////////////// - -struct out_vertex_p2 -{ - float2 tex_uv : TEXCOORD1; - float2 blur_dxdy : TEXCOORD2; - float2 uv_scanline_step : TEXCOORD3; - float estimated_viewport_size_x : TEXCOORD4; - float2 texture_size_inv : TEXCOORD5; - float2 tex_uv_to_pixel_scale : TEXCOORD6; - float2 output_size : TEXCOORD7; -}; - - - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - -// Vertex shader generating a triangle covering the entire screen -void VS_Bloom_Approx(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p2 OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - float2 texture_size = BLOOM_APPROX_texture_size; - float2 output_size = VIEWPORT_SIZE; - - OUT.output_size = output_size; - - // This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h, - // except we're using a different source image. - const float2 video_uv = texcoord * texture_size/video_size; - OUT.tex_uv = video_uv * ORIG_LINEARIZEDvideo_size / - ORIG_LINEARIZEDtexture_size; - // The last pass (vertical scanlines) had a viewport y scale, so we can - // use it to calculate a better runtime sigma: -// OUT.estimated_viewport_size_x = video_size.y * geom_aspect_ratio_x/geom_aspect_ratio_y; - OUT.estimated_viewport_size_x = video_size.y * texture_size.x/texture_size.y; - - // Get the uv sample distance between output pixels. We're using a resize - // blur, so arbitrary upsizing will be acceptable if filter_linearN = - // "true," and arbitrary downsizing will be acceptable if mipmap_inputN = - // "true" too. The blur will be much more accurate if a true 4x4 Gaussian - // resize is used instead of tex2Dblur3x3_resize (which samples between - // texels even for upsizing). - const float2 dxdy_min_scale = ORIG_LINEARIZEDvideo_size/output_size; - const float2 texture_size_inv = 1.0.xx/ORIG_LINEARIZEDtexture_size; - if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize - { - // For upsizing, we'll snap to texels and sample the nearest 4. - const float2 dxdy_scale = max(dxdy_min_scale, 1.0.xx); - OUT.blur_dxdy = dxdy_scale * texture_size_inv; - } - else - { - const float2 dxdy_scale = dxdy_min_scale; - OUT.blur_dxdy = dxdy_scale * texture_size_inv; - } - // tex2Dresize_gaussian4x4 needs to know a bit more than the other filters: - OUT.tex_uv_to_pixel_scale = output_size * - ORIG_LINEARIZEDtexture_size / ORIG_LINEARIZEDvideo_size; - OUT.texture_size_inv = texture_size_inv; - - // Detecting interlacing again here lets us apply convergence offsets in - // this pass. il_step_multiple contains the (texel, scanline) step - // multiple: 1 for progressive, 2 for interlaced. - const float2 orig_video_size = ORIG_LINEARIZEDvideo_size; - const float y_step = 1.0 + float(is_interlaced(orig_video_size.y)); - const float2 il_step_multiple = float2(1.0, y_step); - // Get the uv distance between (texels, same-field scanlines): - OUT.uv_scanline_step = il_step_multiple / ORIG_LINEARIZEDtexture_size; -} - - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -float4 PS_Bloom_Approx(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p2 VAR) : SV_Target -{ - // Would a viewport-relative size work better for this pass? (No.) - // PROS: - // 1.) Instead of writing an absolute size to user-cgp-constants.h, we'd - // write a viewport scale. That number could be used to directly scale - // the viewport-resolution bloom sigma and/or triad size to a smaller - // scale. This way, we could calculate an optimal dynamic sigma no - // matter how the dot pitch is specified. - // CONS: - // 1.) Texel smearing would be much worse at small viewport sizes, but - // performance would be much worse at large viewport sizes, so there - // would be no easy way to calculate a decent scale. - // 2.) Worse, we could no longer get away with using a constant-size blur! - // Instead, we'd have to face all the same difficulties as the real - // phosphor bloom, which requires static #ifdefs to decide the blur - // size based on the expected triad size...a dynamic value. - // 3.) Like the phosphor bloom, we'd have less control over making the blur - // size correct for an optical blur. That said, we likely overblur (to - // maintain brightness) more than the eye would do by itself: 20/20 - // human vision distinguishes ~1 arc minute, or 1/60 of a degree. The - // highest viewing angle recommendation I know of is THX's 40.04 degree - // recommendation, at which 20/20 vision can distinguish about 2402.4 - // lines. Assuming the "TV lines" definition, that means 1201.2 - // distinct light lines and 1201.2 distinct dark lines can be told - // apart, i.e. 1201.2 pairs of lines. This would correspond to 1201.2 - // pairs of alternating lit/unlit phosphors, so 2402.4 phosphors total - // (if they're alternately lit). That's a max of 800.8 triads. Using - // a more popular 30 degree viewing angle recommendation, 20/20 vision - // can distinguish 1800 lines, or 600 triads of alternately lit - // phosphors. In contrast, we currently blur phosphors all the way - // down to 341.3 triads to ensure full brightness. - // 4.) Realistically speaking, we're usually just going to use bilinear - // filtering in this pass anyway, but it only works well to limit - // bandwidth if it's done at a small constant scale. - - // Get the constants we need to sample: - float2 output_size = VAR.output_size; - //const sampler2D Source = ORIG_LINEARIZED; - const float2 tex_uv = VAR.tex_uv; - const float2 blur_dxdy = VAR.blur_dxdy; - const float2 texture_size = ORIG_LINEARIZEDtexture_size; - const float2 texture_size_inv = VAR.texture_size_inv; - const float2 tex_uv_to_pixel_scale = VAR.tex_uv_to_pixel_scale; - float2 tex_uv_r, tex_uv_g, tex_uv_b; - if(beam_misconvergence) - { - const float2 uv_scanline_step = VAR.uv_scanline_step; - const float2 convergence_offsets_r = get_convergence_offsets_r_vector(); - const float2 convergence_offsets_g = get_convergence_offsets_g_vector(); - const float2 convergence_offsets_b = get_convergence_offsets_b_vector(); - tex_uv_r = tex_uv - convergence_offsets_r * uv_scanline_step; - tex_uv_g = tex_uv - convergence_offsets_g * uv_scanline_step; - tex_uv_b = tex_uv - convergence_offsets_b * uv_scanline_step; - } - // Get the blur sigma: - const float bloom_approx_sigma = get_bloom_approx_sigma(output_size.x, - VAR.estimated_viewport_size_x); - - // Sample the resized and blurred texture, and apply convergence offsets if - // necessary. Applying convergence offsets here triples our samples from - // 16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and - // HALATION_BLUR 3 times at full resolution every time they're used. - float3 color_r, color_g, color_b, color; - if(bloom_approx_filter > 1.5) - { - // Use a 4x4 Gaussian resize. This is slower but technically correct. - if(beam_misconvergence) - { - color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r, - blur_dxdy, texture_size, texture_size_inv, - tex_uv_to_pixel_scale, bloom_approx_sigma); - color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g, - blur_dxdy, texture_size, texture_size_inv, - tex_uv_to_pixel_scale, bloom_approx_sigma); - color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b, - blur_dxdy, texture_size, texture_size_inv, - tex_uv_to_pixel_scale, bloom_approx_sigma); - } - else - { - color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv, - blur_dxdy, texture_size, texture_size_inv, - tex_uv_to_pixel_scale, bloom_approx_sigma); - } - } - else if(bloom_approx_filter > 0.5) - { - // Use a 3x3 resize blur. This is the softest option, because we're - // blurring already blurry bilinear samples. It doesn't play quite as - // nicely with convergence offsets, but it has its charms. - if(beam_misconvergence) - { - color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r, - blur_dxdy, bloom_approx_sigma); - color_g = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_g, - blur_dxdy, bloom_approx_sigma); - color_b = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_b, - blur_dxdy, bloom_approx_sigma); - } - else - { - color = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv, blur_dxdy); - } - } - else - { - // Use bilinear sampling. This approximates a 4x4 Gaussian resize MUCH - // better than tex2Dblur3x3_resize for the very small sigmas we're - // likely to use at small output resolutions. (This estimate becomes - // too sharp above ~400x300, but the blurs break down above that - // resolution too, unless min_allowed_viewport_triads is high enough to - // keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.) - if(beam_misconvergence) - { - color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb; - color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb; - color_b = tex2D_linearize(ORIG_LINEARIZED, tex_uv_b).rgb; - } - else - { - color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb; - } - } - // Pack the colors from the red/green/blue beams into a single vector: - if(beam_misconvergence) - { - color = float3(color_r.r, color_g.g, color_b.b); - } - // Encode and output the blurred image: - return encode_output(float4(color, 1.0)); -} - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh deleted file mode 100644 index 681358573..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh +++ /dev/null @@ -1,129 +0,0 @@ -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -#include "../include/user-settings.fxh" -#include "../include/derived-settings-and-constants.fxh" -#include "../include/bind-shader-params.fxh" - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "../include/gamma-management.fxh" -#include "../include/bloom-functions.fxh" -#include "../include/phosphor-mask-resizing.fxh" -#include "../include/scanline-functions.fxh" - -///////////////////////////////// STRUCTURES ///////////////////////////////// - -struct out_vertex_p10 -{ - float2 video_uv : TEXCOORD1; - float2 bloom_dxdy : TEXCOORD2; - float bloom_sigma_runtime : TEXCOORD3; - float2 sinangle : TEXCOORD4; - float2 cosangle : TEXCOORD5; - float3 stretch : TEXCOORD6; -}; - - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - -// Vertex shader generating a triangle covering the entire screen -void VS_Bloom_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p10 OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - float2 texture_size = BLOOM_HORIZONTAL_texture_size; - float2 output_size = VIEWPORT_SIZE; - - // Screen centering - texcoord = texcoord - float2(centerx,centery)/100.0; - - float2 tex_uv = texcoord; - - // Our various input textures use different coords: - const float2 video_uv = tex_uv * texture_size/video_size; - OUT.video_uv = video_uv; - - // We're horizontally blurring the bloom input (vertically blurred - // brightpass). Get the uv distance between output pixels / input texels - // in the horizontal direction (this pass must NOT resize): - OUT.bloom_dxdy = float2(1.0/texture_size.x, 0.0); - - // Calculate a runtime bloom_sigma in case it's needed: - const float mask_tile_size_x = get_resized_mask_tile_size( - output_size, output_size * mask_resize_viewport_scale, false).x; - OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad( - mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh); - - // Precalculate a bunch of useful values we'll need in the fragment - // shader. - OUT.sinangle = sin(float2(geom_x_tilt, geom_y_tilt)); - OUT.cosangle = cos(float2(geom_x_tilt, geom_y_tilt)); - OUT.stretch = maxscale(OUT.sinangle, OUT.cosangle); -} - - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -float4 PS_Bloom_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p10 VAR) : SV_Target -{ - VAR.video_uv = (geom_curvature == true) ? transform(VAR.video_uv, VAR.sinangle, VAR.cosangle, VAR.stretch) : VAR.video_uv; - - float cval = corner((VAR.video_uv-0.5.xx) * BufferToViewportRatio + 0.5.xx); - - // Blur the vertically blurred brightpass horizontally by 9/17/25/43x: - const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime); - const float3 blurred_brightpass = tex2DblurNfast(BLOOM_VERTICAL, - VAR.video_uv, VAR.bloom_dxdy, bloom_sigma); - - // Sample the masked scanlines. Alpha contains the auto-dim factor: - const float3 intensity_dim = - tex2D_linearize(MASKED_SCANLINES, VAR.video_uv).rgb; - const float auto_dim_factor = levels_autodim_temp; - const float undim_factor = 1.0/auto_dim_factor; - - // Calculate the mask dimpass, add it to the blurred brightpass, and - // undim (from scanline auto-dim) and amplify (from mask dim) the result: - const float mask_amplify = get_mask_amplify(); - const float3 brightpass = tex2D_linearize(BRIGHTPASS, - VAR.video_uv).rgb; - const float3 dimpass = intensity_dim - brightpass; - const float3 phosphor_bloom = (dimpass + blurred_brightpass) * - mask_amplify * undim_factor * levels_contrast; - - // Sample the halation texture, and let some light bleed into refractive - // diffusion. Conceptually this occurs before the phosphor bloom, but - // adding it in earlier passes causes black crush in the diffusion colors. - const float3 diffusion_color = levels_contrast * tex2D_linearize( - HALATION_BLUR, VAR.video_uv).rgb; - float3 final_bloom = lerp(phosphor_bloom, - diffusion_color, diffusion_weight); - - final_bloom = (geom_curvature == true) ? final_bloom * cval.xxx : final_bloom; - - final_bloom = pow(final_bloom.rgb, 1.0/get_output_gamma()); - - // Encode and output the bloomed image: - return encode_output(float4(final_bloom, 1.0)); -} - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-vertical.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-vertical.fxh deleted file mode 100644 index 4638ff2c1..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-vertical.fxh +++ /dev/null @@ -1,83 +0,0 @@ -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -#include "../include/user-settings.fxh" -#include "../include/derived-settings-and-constants.fxh" -#include "../include/bind-shader-params.fxh" - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "../include/gamma-management.fxh" -#include "../include/bloom-functions.fxh" -#include "../include/phosphor-mask-resizing.fxh" - - -///////////////////////////////// STRUCTURES ///////////////////////////////// - -struct out_vertex_p9 -{ - float2 tex_uv : TEXCOORD1; - float2 bloom_dxdy : TEXCOORD2; - float bloom_sigma_runtime : TEXCOORD3; -}; - - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - -// Vertex shader generating a triangle covering the entire screen -void VS_Bloom_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p9 OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - float2 texture_size = BLOOM_VERTICAL_texture_size; - float2 output_size = VIEWPORT_SIZE; - - OUT.tex_uv = texcoord; - - // Get the uv sample distance between output pixels. Calculate dxdy like - // blurs/vertex-shader-blur-fast-vertical.h. - const float2 dxdy_scale = video_size/output_size; - const float2 dxdy = dxdy_scale/texture_size; - // This blur is vertical-only, so zero out the vertical offset: - OUT.bloom_dxdy = float2(0.0, dxdy.y); - - // Calculate a runtime bloom_sigma in case it's needed: - const float mask_tile_size_x = get_resized_mask_tile_size( - output_size, output_size * mask_resize_viewport_scale, false).x; - OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad( - mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh); -} - - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -float4 PS_Bloom_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p9 VAR) : SV_Target -{ - // Blur the brightpass horizontally with a 9/17/25/43x blur: - const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime); - const float3 color = tex2DblurNfast(BRIGHTPASS, VAR.tex_uv, - VAR.bloom_dxdy, bloom_sigma); - // Encode and output the blurred image: - return encode_output(float4(color, 1.0)); -} - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-brightpass.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-brightpass.fxh deleted file mode 100644 index f66083bb4..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-brightpass.fxh +++ /dev/null @@ -1,130 +0,0 @@ -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -#include "../include/user-settings.fxh" -#include "../include/derived-settings-and-constants.fxh" -#include "../include/bind-shader-params.fxh" - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "../include/gamma-management.fxh" -#include "../include/blur-functions.fxh" -#include "../include/phosphor-mask-resizing.fxh" -#include "../include/scanline-functions.fxh" -#include "../include/bloom-functions.fxh" - -///////////////////////////////// STRUCTURES ///////////////////////////////// - -struct out_vertex_p8 -{ - float2 video_uv : TEXCOORD1; - float2 scanline_tex_uv : TEXCOORD2; - float2 blur3x3_tex_uv : TEXCOORD3; - float bloom_sigma_runtime : TEXCOORD4; -}; - - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - -// Vertex shader generating a triangle covering the entire screen -void VS_Brightpass(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p8 OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - float2 tex_uv = texcoord; - - float2 texture_size = BRIGHTPASS_texture_size; - float2 output_size = VIEWPORT_SIZE; - - // Our various input textures use different coords: - const float2 video_uv = tex_uv * texture_size/video_size; - OUT.video_uv = video_uv; - OUT.scanline_tex_uv = video_uv * MASKED_SCANLINES_video_size / - MASKED_SCANLINES_texture_size; - OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size / BLOOM_APPROX_texture_size; - - // Calculate a runtime bloom_sigma in case it's needed: - const float mask_tile_size_x = get_resized_mask_tile_size( - output_size, output_size * mask_resize_viewport_scale, false).x; - OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad( - mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh); -} - - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -float4 PS_Brightpass(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p8 VAR) : SV_Target -{ - // Sample the masked scanlines: - const float3 intensity_dim = - tex2D_linearize(MASKED_SCANLINES, VAR.scanline_tex_uv).rgb; - // Get the full intensity, including auto-undimming, and mask compensation: - const float auto_dim_factor = levels_autodim_temp; - const float undim_factor = 1.0/auto_dim_factor; - const float mask_amplify = get_mask_amplify(); - const float3 intensity = intensity_dim * undim_factor * mask_amplify * - levels_contrast; - - // Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines - // would look like, so we can estimate how much energy we'll receive from - // blooming neighbors: - const float3 phosphor_blur_approx = levels_contrast * tex2D_linearize( - BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb; - - // Compute the blur weight for the center texel and the maximum energy we - // expect to receive from neighbors: - const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime); - const float center_weight = get_center_weight(bloom_sigma); - const float3 max_area_contribution_approx = - max(0.0.xxx, phosphor_blur_approx - center_weight * intensity); - // Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0), - // because it actually gets better results (on top of being very simple), - // but adjust all intensities for the user's desired underestimate factor: - const float3 area_contrib_underestimate = - bloom_underestimate_levels * max_area_contribution_approx; - const float3 intensity_underestimate = - bloom_underestimate_levels * intensity; - // Calculate the blur_ratio, the ratio of intensity we want to blur: - #ifdef BRIGHTPASS_AREA_BASED - // This area-based version changes blur_ratio more smoothly and blurs - // more, clipping less but offering less phosphor differentiation: - const float3 phosphor_blur_underestimate = bloom_underestimate_levels * - phosphor_blur_approx; - const float3 soft_intensity = max(intensity_underestimate, - phosphor_blur_underestimate * mask_amplify); - const float3 blur_ratio_temp = - ((1.0.xxx - area_contrib_underestimate) / - soft_intensity - 1.0.xxx) / (center_weight - 1.0); - #else - const float3 blur_ratio_temp = - ((1.0.xxx - area_contrib_underestimate) / - intensity_underestimate - 1.0.xxx) / (center_weight - 1.0); - #endif - const float3 blur_ratio = clamp(blur_ratio_temp, 0.0, 1.0); - // Calculate the brightpass based on the auto-dimmed, unamplified, masked - // scanlines, encode if necessary, and return! - const float3 brightpass = intensity_dim * - lerp(blur_ratio, 1.0.xxx, bloom_excess); - return encode_output(float4(brightpass, 1.0)); -} - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh deleted file mode 100644 index a2dc129dd..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh +++ /dev/null @@ -1,109 +0,0 @@ -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -// PASS SETTINGS: -// gamma-management.h needs to know what kind of pipeline we're using and -// what pass this is in that pipeline. This will become obsolete if/when we -// can #define things like this in the .cgp preset file. -#define FIRST_PASS -#define SIMULATE_CRT_ON_LCD - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "../include/user-settings.fxh" -#include "../include/bind-shader-params.fxh" -#include "../include/gamma-management.fxh" -#include "../include/scanline-functions.fxh" - - -///////////////////////////////// STRUCTURES ///////////////////////////////// - -struct out_vertex -{ - float2 tex_uv : TEXCOORD1; - float2 uv_step : TEXCOORD2; - float interlaced : TEXCOORD3; -}; - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - -// Vertex shader generating a triangle covering the entire screen -void VS_Linearize(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - OUT.tex_uv = texcoord; -// OUT.tex_uv = (floor(texcoord / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize; - // Save the uv distance between texels: - OUT.uv_step = NormalizedNativePixelSize; - - // Detect interlacing: 1.0 = true, 0.0 = false. - OUT.interlaced = is_interlaced(1.0/NormalizedNativePixelSize.y); -} - - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;}; - -#define input_texture sBackBuffer - -float4 PS_Linearize(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex VAR) : SV_Target -{ - // Linearize the input based on CRT gamma and bob interlaced fields. - // Bobbing ensures we can immediately blur without getting artifacts. - // Note: TFF/BFF won't matter for sources that double-weave or similar. - // VAR.tex_uv = (floor(VAR.tex_uv / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize; - - if(interlace_detect) - { - // Sample the current line and an average of the previous/next line; - // tex2D_linearize will decode CRT gamma. Don't bother branching: - const float2 tex_uv = VAR.tex_uv; - const float2 v_step = float2(0.0, VAR.uv_step.y); - const float3 curr_line = tex2D_linearize_first( - input_texture, tex_uv).rgb; - const float3 last_line = tex2D_linearize_first( - input_texture, tex_uv - v_step).rgb; - const float3 next_line = tex2D_linearize_first( - input_texture, tex_uv + v_step).rgb; - const float3 interpolated_line = 0.5 * (last_line + next_line); - // If we're interlacing, determine which field curr_line is in: - const float modulus = VAR.interlaced + 1.0; - const float field_offset = - fmod(FrameCount + float(interlace_bff), modulus); - const float curr_line_texel = tex_uv.y / NormalizedNativePixelSize.y; - // Use under_half to fix a rounding bug around exact texel locations. - const float line_num_last = floor(curr_line_texel - under_half); - const float wrong_field = fmod(line_num_last + field_offset, modulus); - // Select the correct color, and output the result: - const float3 color = lerp(curr_line, interpolated_line, wrong_field); - return encode_output(float4(color, 1.0)); - } - else - { - return encode_output(tex2D_linearize_first(input_texture, VAR.tex_uv)); - } -} - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-horizontal.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-horizontal.fxh deleted file mode 100644 index 8e779a496..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-horizontal.fxh +++ /dev/null @@ -1,130 +0,0 @@ -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -#include "../include/user-settings.fxh" -#include "../include/derived-settings-and-constants.fxh" -#include "../include/bind-shader-params.fxh" - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "../include/phosphor-mask-resizing.fxh" - - -///////////////////////////////// STRUCTURES ///////////////////////////////// - -struct out_vertex_p6 -{ - float2 src_tex_uv_wrap : TEXCOORD1; - float2 tile_uv_wrap : TEXCOORD2; - float2 resize_magnification_scale : TEXCOORD3; - float2 src_dxdy : TEXCOORD4; - float2 tile_size_uv : TEXCOORD5; - float2 input_tiles_per_texture : TEXCOORD6; -}; - - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - -// Vertex shader generating a triangle covering the entire screen -void VS_Mask_Resize_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p6 OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - float2 tex_uv = texcoord; - - float2 texture_size = MASK_RESIZE_texture_size; - float2 output_size = 0.0625*(VIEWPORT_SIZE); - - // First estimate the viewport size (the user will get the wrong number of - // triads if it's wrong and mask_specify_num_triads is 1.0/true). - const float2 estimated_viewport_size = - output_size / mask_resize_viewport_scale; - // Find the final size of our resized phosphor mask tiles. We probably - // estimated the viewport size and MASK_RESIZE output size differently last - // pass, so do not swear they were the same. ;) - const float2 mask_resize_tile_size = get_resized_mask_tile_size( - estimated_viewport_size, output_size, false); - - // We'll render resized tiles until filling the output FBO or meeting a - // limit, so compute [wrapped] tile uv coords based on the output uv coords - // and the number of tiles that will fit in the FBO. - const float2 output_tiles_this_pass = output_size / mask_resize_tile_size; - const float2 output_video_uv = tex_uv * texture_size / video_size; - const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass; - - // Get the texel size of an input tile and related values: - const float2 input_tile_size = float2(min( - mask_resize_src_lut_size.x, video_size.x), mask_resize_tile_size.y); - const float2 tile_size_uv = input_tile_size / texture_size; - const float2 input_tiles_per_texture = texture_size / input_tile_size; - - // Derive [wrapped] texture uv coords from [wrapped] tile uv coords and - // the tile size in uv coords, and save frac() for the fragment shader. - const float2 src_tex_uv_wrap = tile_uv_wrap * tile_size_uv; - - // Output the values we need, including the magnification scale and step: - OUT.tile_uv_wrap = tile_uv_wrap; - OUT.src_tex_uv_wrap = src_tex_uv_wrap; - OUT.resize_magnification_scale = mask_resize_tile_size / input_tile_size; - OUT.src_dxdy = float2(1.0/texture_size.x, 0.0); - OUT.tile_size_uv = tile_size_uv; - OUT.input_tiles_per_texture = input_tiles_per_texture; -} - - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -float4 PS_Mask_Resize_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p6 VAR) : SV_Target -{ - // The input contains one mask tile horizontally and a number vertically. - // Resize the tile horizontally to its final screen size and repeat it - // until drawing at least mask_resize_num_tiles, leaving it unchanged - // vertically. Lanczos-resizing the phosphor mask achieves much sharper - // results than mipmapping, outputting >= mask_resize_num_tiles makes for - // easier tiled sampling later. - #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE - // Discard unneeded fragments in case our profile allows real branches. - float2 texture_size = MASK_RESIZE_texture_size; - const float2 tile_uv_wrap = VAR.tile_uv_wrap; - if(get_mask_sample_mode() < 0.5 && - max(tile_uv_wrap.x, tile_uv_wrap.y) <= mask_resize_num_tiles) - { - const float src_dx = VAR.src_dxdy.x; - const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap); - const float3 pixel_color = downsample_horizontal_sinc_tiled(MASK_RESIZE_VERTICAL, - src_tex_uv, texture_size, VAR.src_dxdy.x, - VAR.resize_magnification_scale.x, VAR.tile_size_uv.x); - // The input LUT was linear RGB, and so is our output: - return float4(pixel_color, 1.0); - } - else - { - discard; - } - #else - discard; - return 1.0.xxxx; - #endif -} - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-vertical.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-vertical.fxh deleted file mode 100644 index fc4d46fc7..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-vertical.fxh +++ /dev/null @@ -1,164 +0,0 @@ -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -#include "../include/user-settings.fxh" -#include "../include/derived-settings-and-constants.fxh" -#include "../include/bind-shader-params.fxh" - - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "../include/phosphor-mask-resizing.fxh" - - -///////////////////////////////// STRUCTURES ///////////////////////////////// - -struct out_vertex_p5 -{ - float2 src_tex_uv_wrap : TEXCOORD1; - float2 resize_magnification_scale : TEXCOORD2; -}; - - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - -// Vertex shader generating a triangle covering the entire screen -void VS_Mask_Resize_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p5 OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - float2 tex_uv = texcoord; - - float2 texture_size = MASK_RESIZE_VERT_texture_size; - float2 output_size = float2(64.0, 0.0625*((VIEWPORT_SIZE).y)); - - // First estimate the viewport size (the user will get the wrong number of - // triads if it's wrong and mask_specify_num_triads is 1.0/true). - const float viewport_y = output_size.y / mask_resize_viewport_scale.y; -// Now get aspect_ratio from texture_size. -// const float aspect_ratio = geom_aspect_ratio_x / geom_aspect_ratio_y; - const float aspect_ratio = texture_size.x / texture_size.y; - const float2 estimated_viewport_size = - float2(viewport_y * aspect_ratio, viewport_y); - // Estimate the output size of MASK_RESIZE (the next pass). The estimated - // x component shouldn't matter, because we're not using the x result, and - // we're not swearing it's correct (if we did, the x result would influence - // the y result to maintain the tile aspect ratio). - const float2 estimated_mask_resize_output_size = - float2(output_size.y * aspect_ratio, output_size.y); - // Find the final intended [y] size of our resized phosphor mask tiles, - // then the tile size for the current pass (resize y only): - const float2 mask_resize_tile_size = get_resized_mask_tile_size( - estimated_viewport_size, estimated_mask_resize_output_size, false); - const float2 pass_output_tile_size = float2(min( - mask_resize_src_lut_size.x, output_size.x), mask_resize_tile_size.y); - - // We'll render resized tiles until filling the output FBO or meeting a - // limit, so compute [wrapped] tile uv coords based on the output uv coords - // and the number of tiles that will fit in the FBO. - const float2 output_tiles_this_pass = output_size / pass_output_tile_size; - const float2 output_video_uv = tex_uv * texture_size / video_size; - const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass; - - // The input LUT is just a single mask tile, so texture uv coords are the - // same as tile uv coords (save frac() for the fragment shader). The - // magnification scale is also straightforward: - OUT.src_tex_uv_wrap = tile_uv_wrap; - OUT.resize_magnification_scale = - pass_output_tile_size / mask_resize_src_lut_size; -} - - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -float4 PS_Mask_Resize_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p5 VAR) : SV_Target -{ - // Resize the input phosphor mask tile to the final vertical size it will - // appear on screen. Keep 1x horizontal size if possible (IN.output_size - // >= mask_resize_src_lut_size), and otherwise linearly sample horizontally - // to fit exactly one tile. Lanczos-resizing the phosphor mask achieves - // much sharper results than mipmapping, and vertically resizing first - // minimizes the total number of taps required. We output a number of - // resized tiles >= mask_resize_num_tiles for easier tiled sampling later. - #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE - // Discard unneeded fragments in case our profile allows real branches. - const float2 tile_uv_wrap = VAR.src_tex_uv_wrap; - if(get_mask_sample_mode() < 0.5 && - tile_uv_wrap.y <= mask_resize_num_tiles) - { - static const float src_dy = 1.0/mask_resize_src_lut_size.y; - const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap); - float3 pixel_color; - // If mask_type is static, this branch will be resolved statically. - #ifdef PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT - if(mask_type < 0.5) - { - pixel_color = downsample_vertical_sinc_tiled( - mask_grille_texture_large, src_tex_uv, mask_resize_src_lut_size, - src_dy, VAR.resize_magnification_scale.y, 1.0); - } - else if(mask_type < 1.5) - { - pixel_color = downsample_vertical_sinc_tiled( - mask_slot_texture_large, src_tex_uv, mask_resize_src_lut_size, - src_dy, VAR.resize_magnification_scale.y, 1.0); - } - else - { - pixel_color = downsample_vertical_sinc_tiled( - mask_shadow_texture_large, src_tex_uv, mask_resize_src_lut_size, - src_dy, VAR.resize_magnification_scale.y, 1.0); - } - #else - if(mask_type < 0.5) - { - pixel_color = downsample_vertical_sinc_tiled( - mask_grille_texture_small, src_tex_uv, mask_resize_src_lut_size, - src_dy, VAR.resize_magnification_scale.y, 1.0); - } - else if(mask_type < 1.5) - { - pixel_color = downsample_vertical_sinc_tiled( - mask_slot_texture_small, src_tex_uv, mask_resize_src_lut_size, - src_dy, VAR.resize_magnification_scale.y, 1.0); - } - else - { - pixel_color = downsample_vertical_sinc_tiled( - mask_shadow_texture_small, src_tex_uv, mask_resize_src_lut_size, - src_dy, VAR.resize_magnification_scale.y, 1.0); - } - #endif - // The input LUT was linear RGB, and so is our output: - return float4(pixel_color, 1.0); - } - else - { - discard; - } - #else - discard; - return 1.0.xxxx; - #endif -} - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh deleted file mode 100644 index a247f49a6..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh +++ /dev/null @@ -1,283 +0,0 @@ -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - - -///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// - -#include "../include/user-settings.fxh" -#include "../include/derived-settings-and-constants.fxh" -#include "../include/bind-shader-params.fxh" - -////////////////////////////////// INCLUDES ////////////////////////////////// - -#include "../include/scanline-functions.fxh" -#include "../include/phosphor-mask-resizing.fxh" -#include "../include/bloom-functions.fxh" -#include "../include/gamma-management.fxh" - - -/////////////////////////////////// HELPERS ////////////////////////////////// - -float4 tex2Dtiled_mask_linearize(const sampler2D tex, - const float2 tex_uv) -{ - // If we're manually tiling a texture, anisotropic filtering can get - // confused. One workaround is to just select the lowest mip level: - #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE - #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD - // TODO: Use tex2Dlod_linearize with a calculated mip level. - return tex2Dlod_linearize(tex, float4(tex_uv, 0.0, 0.0)); - #else - #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS - return tex2Dbias_linearize(tex, float4(tex_uv, 0.0, -16.0)); - #else - return tex2D_linearize(tex, tex_uv); - #endif - #endif - #else - return tex2D_linearize(tex, tex_uv); - #endif -} - - -///////////////////////////////// STRUCTURES ///////////////////////////////// - - -struct out_vertex_p7 -{ - // Use explicit semantics so COLORx doesn't clamp values outside [0, 1]. - float2 video_uv : TEXCOORD1; - float2 scanline_tex_uv : TEXCOORD2; - float2 blur3x3_tex_uv : TEXCOORD3; - float2 halation_tex_uv : TEXCOORD4; - float2 scanline_texture_size_inv : TEXCOORD5; - float4 mask_tile_start_uv_and_size : TEXCOORD6; - float2 mask_tiles_per_screen : TEXCOORD7; -}; - - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - - -// Vertex shader generating a triangle covering the entire screen -void VS_Scanlines_Horizontal_Apply_Mask(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p7 OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - float2 tex_uv = texcoord; - - float2 texture_size = MASKED_SCANLINES_texture_size; - float2 output_size = VIEWPORT_SIZE; - - // Our various input textures use different coords. - const float2 video_uv = tex_uv * texture_size/video_size; - const float2 scanline_texture_size_inv = - 1.0.xx/VERTICAL_SCANLINES_texture_size; - OUT.video_uv = video_uv; - OUT.scanline_tex_uv = video_uv * VERTICAL_SCANLINES_video_size * - scanline_texture_size_inv; - OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size / - BLOOM_APPROX_texture_size; - OUT.halation_tex_uv = video_uv * HALATION_BLUR_video_size / - HALATION_BLUR_texture_size; - OUT.scanline_texture_size_inv = scanline_texture_size_inv; - - // Get a consistent name for the final mask texture size. Sample mode 0 - // uses the manually resized mask, but ignore it if we never resized. - #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE - const float mask_sample_mode = get_mask_sample_mode(); - const float2 mask_resize_texture_size = mask_sample_mode < 0.5 ? - MASKED_SCANLINES_texture_size : mask_texture_large_size; - const float2 mask_resize_video_size = mask_sample_mode < 0.5 ? - MASKED_SCANLINES_video_size : mask_texture_large_size; - #else - const float2 mask_resize_texture_size = mask_texture_large_size; - const float2 mask_resize_video_size = mask_texture_large_size; - #endif - // Compute mask tile dimensions, starting points, etc.: - float2 mask_tiles_per_screen; - OUT.mask_tile_start_uv_and_size = get_mask_sampling_parameters( - mask_resize_texture_size, mask_resize_video_size, output_size, - mask_tiles_per_screen); - OUT.mask_tiles_per_screen = mask_tiles_per_screen; -} - - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -float4 PS_Scanlines_Horizontal_Apply_Mask(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p7 VAR) : SV_Target -{ - // This pass: Sample (misconverged?) scanlines to the final horizontal - // resolution, apply halation (bouncing electrons), and apply the phosphor - // mask. Fake a bloom if requested. Unless we fake a bloom, the output - // will be dim from the scanline auto-dim, mask dimming, and low gamma. - - // Horizontally sample the current row (a vertically interpolated scanline) - // and account for horizontal convergence offsets, given in units of texels. - // float2 VERTICAL_SCANLINES_texture_size = float2(1.0/NormalizedNativePixelSize.x, ViewportSize.y*BufferToViewportRatio.y); - - float2 output_size = VIEWPORT_SIZE; - - const float3 scanline_color_dim = sample_rgb_scanline_horizontal( - VERTICAL_SCANLINES, VAR.scanline_tex_uv, - VERTICAL_SCANLINES_texture_size, VAR.scanline_texture_size_inv); - const float auto_dim_factor = levels_autodim_temp; - - // Sample the phosphor mask: - const float2 tile_uv_wrap = VAR.video_uv * VAR.mask_tiles_per_screen; - const float2 mask_tex_uv = convert_phosphor_tile_uv_wrap_to_tex_uv( - tile_uv_wrap, VAR.mask_tile_start_uv_and_size); - float3 phosphor_mask_sample; - #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE - const bool sample_orig_luts = get_mask_sample_mode() > 0.5; - #else - static const bool sample_orig_luts = true; - #endif - if(sample_orig_luts) - { - // If mask_type is static, this branch will be resolved statically. - if(mask_type < 0.5) - { - phosphor_mask_sample = tex2D_linearize( - mask_grille_texture_large, mask_tex_uv).rgb; - } - else if(mask_type < 1.5) - { - phosphor_mask_sample = tex2D_linearize( - mask_slot_texture_large, mask_tex_uv).rgb; - } - else - { - phosphor_mask_sample = tex2D_linearize( - mask_shadow_texture_large, mask_tex_uv).rgb; - } - } - else - { - // Sample the resized mask, and avoid tiling artifacts: - phosphor_mask_sample = tex2Dtiled_mask_linearize( - MASK_RESIZE, mask_tex_uv).rgb; - } - - // Sample the halation texture (auto-dim to match the scanlines), and - // account for both horizontal and vertical convergence offsets, given - // in units of texels horizontally and same-field scanlines vertically: - const float3 halation_color = tex2D_linearize( - HALATION_BLUR, VAR.halation_tex_uv).rgb; - - // Apply halation: Halation models electrons flying around under the glass - // and hitting the wrong phosphors (of any color). It desaturates, so - // average the halation electrons to a scalar. Reduce the local scanline - // intensity accordingly to conserve energy. - const float3 halation_intensity_dim = - dot(halation_color, auto_dim_factor.xxx/3.0).xxx; - const float3 electron_intensity_dim = lerp(scanline_color_dim, - halation_intensity_dim, halation_weight); - - // Apply the phosphor mask: - const float3 phosphor_emission_dim = electron_intensity_dim * - phosphor_mask_sample; - - #ifdef PHOSPHOR_BLOOM_FAKE - // The BLOOM_APPROX pass approximates a blurred version of a masked - // and scanlined image. It's usually used to compute the brightpass, - // but we can also use it to fake the bloom stage entirely. Caveats: - // 1.) A fake bloom is conceptually different, since we're mixing in a - // fully blurred low-res image, and the biggest implication are: - // 2.) If mask_amplify is incorrect, results deteriorate more quickly. - // 3.) The inaccurate blurring hurts quality in high-contrast areas. - // 4.) The bloom_underestimate_levels parameter seems less sensitive. - // Reverse the auto-dimming and amplify to compensate for mask dimming: - #define PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND - #ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND - static const float blur_contrast = 1.05; - #else - static const float blur_contrast = 1.0; - #endif - const float mask_amplify = get_mask_amplify(); - const float undim_factor = 1.0/auto_dim_factor; - const float3 phosphor_emission = - phosphor_emission_dim * undim_factor * mask_amplify; - // Get a phosphor blur estimate, accounting for convergence offsets: - const float3 electron_intensity = electron_intensity_dim * undim_factor; - const float3 phosphor_blur_approx_soft = tex2D_linearize( - BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb; - const float3 phosphor_blur_approx = lerp(phosphor_blur_approx_soft, - electron_intensity, 0.1) * blur_contrast; - // We could blend between phosphor_emission and phosphor_blur_approx, - // solving for the minimum blend_ratio that avoids clipping past 1.0: - // 1.0 >= total_intensity - // 1.0 >= phosphor_emission * (1.0 - blend_ratio) + - // phosphor_blur_approx * blend_ratio - // blend_ratio = (phosphor_emission - 1.0)/ - // (phosphor_emission - phosphor_blur_approx); - // However, this blurs far more than necessary, because it aims for - // full brightness, not minimal blurring. To fix it, base blend_ratio - // on a max area intensity only so it varies more smoothly: - const float3 phosphor_blur_underestimate = - phosphor_blur_approx * bloom_underestimate_levels; - const float3 area_max_underestimate = - phosphor_blur_underestimate * mask_amplify; - #ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND - const float3 blend_ratio_temp = - (area_max_underestimate - 1.0.xxx) / - (area_max_underestimate - phosphor_blur_underestimate); - #else - // Try doing it like an area-based brightpass. This is nearly - // identical, but it's worth toying with the code in case I ever - // find a way to make it look more like a real bloom. (I've had - // some promising textures from combining an area-based blend ratio - // for the phosphor blur and a more brightpass-like blend-ratio for - // the phosphor emission, but I haven't found a way to make the - // brightness correct across the whole color range, especially with - // different bloom_underestimate_levels values.) - const float desired_triad_size = lerp(mask_triad_size_desired, - output_size.x/mask_num_triads_desired, - mask_specify_num_triads); - const float bloom_sigma = get_min_sigma_to_blur_triad( - desired_triad_size, bloom_diff_thresh); - const float center_weight = get_center_weight(bloom_sigma); - const float3 max_area_contribution_approx = - max(0.0.xxx, phosphor_blur_approx - - center_weight * phosphor_emission); - const float3 area_contrib_underestimate = - bloom_underestimate_levels * max_area_contribution_approx; - const float3 blend_ratio_temp = - ((1.0.xxx - area_contrib_underestimate) / - area_max_underestimate - 1.0.xxx) / (center_weight - 1.0); - #endif - // Clamp blend_ratio in case it's out-of-range, but be SUPER careful: - // min/max/clamp are BIZARRELY broken with lerp (optimization bug?), - // and this redundant sequence avoids bugs, at least on nVidia cards: - const float3 blend_ratio_clamped = max(clamp(blend_ratio_temp, 0.0, 1.0), 0.0); - const float3 blend_ratio = lerp(blend_ratio_clamped, 1.0.xxx, bloom_excess); - // Blend the blurred and unblurred images: - const float3 phosphor_emission_unclipped = - lerp(phosphor_emission, phosphor_blur_approx, blend_ratio); - // Simulate refractive diffusion by reusing the halation sample. - const float3 pixel_color = lerp(phosphor_emission_unclipped, - halation_color, diffusion_weight); - #else - const float3 pixel_color = phosphor_emission_dim; - #endif - // Encode if necessary, and output. - return encode_output(float4(pixel_color, 1.0)); -} - diff --git a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh deleted file mode 100644 index cad91ba0e..000000000 --- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh +++ /dev/null @@ -1,241 +0,0 @@ -///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// - -// crt-royale: A full-featured CRT shader, with cheese. -// Copyright (C) 2014 TroggleMonkey -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 2 of the License, or any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 59 Temple -// Place, Suite 330, Boston, MA 02111-1307 USA - -#undef FIRST_PASS -////////////////////////////////// INCLUDES ////////////////////////////////// - -//#include "../include/user-settings.fxh" -//#include "../include/derived-settings-and-constants.fxh" -#include "../include/bind-shader-params.fxh" -#include "../include/scanline-functions.fxh" -//#include "../include/gamma-management.fxh" - -///////////////////////////////// STRUCTURES ///////////////////////////////// - -struct out_vertex_p1 -{ - // Use explicit semantics so COLORx doesn't clamp values outside [0, 1]. - float2 tex_uv : TEXCOORD1; - float2 uv_step : TEXCOORD2; // uv size of a texel (x) and scanline (y) - float2 il_step_multiple : TEXCOORD3; // (1, 1) = progressive, (1, 2) = interlaced - float pixel_height_in_scanlines : TEXCOORD4; // Height of an output pixel in scanlines -}; - - -//////////////////////////////// VERTEX SHADER /////////////////////////////// - -// Vertex shader generating a triangle covering the entire screen -void VS_Scanlines_Vertical_Interlacing(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p1 OUT) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - OUT.tex_uv = texcoord; - - float2 texture_size = VERTICAL_SCANLINES_texture_size; - float2 output_size = float2(TEXTURE_SIZE.x, VIEWPORT_SIZE.y); - - // Detect interlacing: il_step_multiple indicates the step multiple between - // lines: 1 is for progressive sources, and 2 is for interlaced sources. -// const float2 video_size = 1.0/NormalizedNativePixelSize; - const float y_step = 1.0 + float(is_interlaced(video_size.y)); - OUT.il_step_multiple = float2(1.0, y_step); - // Get the uv tex coords step between one texel (x) and scanline (y): - OUT.uv_step = OUT.il_step_multiple / texture_size; - - // If shader parameters are used, {min, max}_{sigma, shape} are runtime - // values. Compute {sigma, shape}_range outside of scanline_contrib() so - // they aren't computed once per scanline (6 times per fragment and up to - // 18 times per vertex): -/* const float sigma_range = max(beam_max_sigma, beam_min_sigma) - - beam_min_sigma; - const float shape_range = max(beam_max_shape, beam_min_shape) - - beam_min_shape; -*/ - // We need the pixel height in scanlines for antialiased/integral sampling: - const float ph = (video_size.y / output_size.y) / - OUT.il_step_multiple.y; - OUT.pixel_height_in_scanlines = ph; - -} - - -/////////////////////////////// FRAGMENT SHADER ////////////////////////////// - -float4 PS_Scanlines_Vertical_Interlacing(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p1 VAR) : SV_Target -{ - // This pass: Sample multiple (misconverged?) scanlines to the final - // vertical resolution. Temporarily auto-dim the output to avoid clipping. - - // Read some attributes into local variables: - const float2 texture_size = VERTICAL_SCANLINES_texture_size; - const float2 texture_size_inv = 1.0/texture_size; - const float2 uv_step = VAR.uv_step; - const float2 il_step_multiple = VAR.il_step_multiple; - const float frame_count = FrameCount; - const float ph = VAR.pixel_height_in_scanlines; - - // Get the uv coords of the previous scanline (in this field), and the - // scanline's distance from this sample, in scanlines. - float dist; - const float2 scanline_uv = get_last_scanline_uv(VAR.tex_uv, texture_size, - texture_size_inv, il_step_multiple, frame_count, dist); - - // Consider 2, 3, 4, or 6 scanlines numbered 0-5: The previous and next - // scanlines are numbered 2 and 3. Get scanline colors colors (ignore - // horizontal sampling, since since IN.output_size.x = video_size.x). - // NOTE: Anisotropic filtering creates interlacing artifacts, which is why - // ORIG_LINEARIZED bobbed any interlaced input before this pass. - const float2 v_step = float2(0.0, uv_step.y); - const float3 scanline2_color = tex2D_linearize(ORIG_LINEARIZED, scanline_uv).rgb; - const float3 scanline3_color = - tex2D_linearize(ORIG_LINEARIZED, scanline_uv + v_step).rgb; - float3 scanline0_color, scanline1_color, scanline4_color, scanline5_color, - scanline_outside_color; - float dist_round; - // Use scanlines 0, 1, 4, and 5 for a total of 6 scanlines: - if(beam_num_scanlines > 5.5) - { - scanline1_color = - tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb; - scanline4_color = - tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb; - scanline0_color = - tex2D_linearize(ORIG_LINEARIZED, scanline_uv - 2.0 * v_step).rgb; - scanline5_color = - tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 3.0 * v_step).rgb; - } - // Use scanlines 1, 4, and either 0 or 5 for a total of 5 scanlines: - else if(beam_num_scanlines > 4.5) - { - scanline1_color = - tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb; - scanline4_color = - tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb; - // dist is in [0, 1] - dist_round = round(dist); - const float2 sample_0_or_5_uv_off = - lerp(-2.0 * v_step, 3.0 * v_step, dist_round); - // Call this "scanline_outside_color" to cope with the conditional - // scanline number: - scanline_outside_color = tex2D_linearize( - ORIG_LINEARIZED, scanline_uv + sample_0_or_5_uv_off).rgb; - } - // Use scanlines 1 and 4 for a total of 4 scanlines: - else if(beam_num_scanlines > 3.5) - { - scanline1_color = - tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb; - scanline4_color = - tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb; - } - // Use scanline 1 or 4 for a total of 3 scanlines: - else if(beam_num_scanlines > 2.5) - { - // dist is in [0, 1] - dist_round = round(dist); - const float2 sample_1or4_uv_off = - lerp(-v_step, 2.0 * v_step, dist_round); - scanline_outside_color = tex2D_linearize( - ORIG_LINEARIZED, scanline_uv + sample_1or4_uv_off).rgb; - } - - // Compute scanline contributions, accounting for vertical convergence. - // Vertical convergence offsets are in units of current-field scanlines. - // dist2 means "positive sample distance from scanline 2, in scanlines:" - float3 dist2 = dist.xxx; - if(beam_misconvergence) - { - const float3 convergence_offsets_vert_rgb = - get_convergence_offsets_y_vector(); - dist2 = dist.xxx - convergence_offsets_vert_rgb; - } - // Calculate {sigma, shape}_range outside of scanline_contrib so it's only - // done once per pixel (not 6 times) with runtime params. Don't reuse the - // vertex shader calculations, so static versions can be constant-folded. - const float sigma_range = max(beam_max_sigma, beam_min_sigma) - - beam_min_sigma; - const float shape_range = max(beam_max_shape, beam_min_shape) - - beam_min_shape; - // Calculate and sum final scanline contributions, starting with lines 2/3. - // There is no normalization step, because we're not interpolating a - // continuous signal. Instead, each scanline is an additive light source. - const float3 scanline2_contrib = scanline_contrib(dist2, - scanline2_color, ph, sigma_range, shape_range); - const float3 scanline3_contrib = scanline_contrib(abs(1.0.xxx - dist2), - scanline3_color, ph, sigma_range, shape_range); - float3 scanline_intensity = scanline2_contrib + scanline3_contrib; - if(beam_num_scanlines > 5.5) - { - const float3 scanline0_contrib = - scanline_contrib(dist2 + 2.0.xxx, scanline0_color, - ph, sigma_range, shape_range); - const float3 scanline1_contrib = - scanline_contrib(dist2 + 1.0.xxx, scanline1_color, - ph, sigma_range, shape_range); - const float3 scanline4_contrib = - scanline_contrib(abs(2.0.xxx - dist2), scanline4_color, - ph, sigma_range, shape_range); - const float3 scanline5_contrib = - scanline_contrib(abs(3.0.xxx - dist2), scanline5_color, - ph, sigma_range, shape_range); - scanline_intensity += scanline0_contrib + scanline1_contrib + - scanline4_contrib + scanline5_contrib; - } - else if(beam_num_scanlines > 4.5) - { - const float3 scanline1_contrib = - scanline_contrib(dist2 + 1.0.xxx, scanline1_color, - ph, sigma_range, shape_range); - const float3 scanline4_contrib = - scanline_contrib(abs(2.0.xxx - dist2), scanline4_color, - ph, sigma_range, shape_range); - const float3 dist0or5 = lerp( - dist2 + 2.0.xxx, 3.0.xxx - dist2, dist_round); - const float3 scanline0or5_contrib = scanline_contrib( - dist0or5, scanline_outside_color, ph, sigma_range, shape_range); - scanline_intensity += scanline1_contrib + scanline4_contrib + - scanline0or5_contrib; - } - else if(beam_num_scanlines > 3.5) - { - const float3 scanline1_contrib = - scanline_contrib(dist2 + 1.0.xxx, scanline1_color, - ph, sigma_range, shape_range); - const float3 scanline4_contrib = - scanline_contrib(abs(2.0.xxx - dist2), scanline4_color, - ph, sigma_range, shape_range); - scanline_intensity += scanline1_contrib + scanline4_contrib; - } - else if(beam_num_scanlines > 2.5) - { - const float3 dist1or4 = lerp( - dist2 + 1.0.xxx, 2.0.xxx - dist2, dist_round); - const float3 scanline1or4_contrib = scanline_contrib( - dist1or4, scanline_outside_color, ph, sigma_range, shape_range); - scanline_intensity += scanline1or4_contrib; - } - - // Auto-dim the image to avoid clipping, encode if necessary, and output. - // My original idea was to compute a minimal auto-dim factor and put it in - // the alpha channel, but it wasn't working, at least not reliably. This - // is faster anyway, levels_autodim_temp = 0.5 isn't causing banding. - return encode_output(float4(scanline_intensity * levels_autodim_temp, 1.0)); -} - diff --git a/data/resources/shaders/reshade/Shaders/denoisers/bilateral.fx b/data/resources/shaders/reshade/Shaders/denoisers/bilateral.fx deleted file mode 100644 index 6063dba0e..000000000 --- a/data/resources/shaders/reshade/Shaders/denoisers/bilateral.fx +++ /dev/null @@ -1,166 +0,0 @@ -#include "ReShade.fxh" - -/* - Bilateral - Smart - - Copyright (C) 2024 guest(r) - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -*/ - - -uniform float FRANGE < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 10.0; - ui_step = 1.0; - ui_label = "Filter Range"; -> = 5.0; - -uniform float FBSMOOTH < - ui_type = "drag"; - ui_min = 0.05; - ui_max = 1.0; - ui_step = 0.025; - ui_label = "Filter Base Smoothing"; -> = 0.3; - -uniform float FSIGMA < - ui_type = "drag"; - ui_min = 0.15; - ui_max = 1.5; - ui_step = 0.05; - ui_label = "Filter Strength"; -> = 1.0; - -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >; -uniform float2 ViewportSize < source = "viewportsize"; >; - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;}; - -texture2D tBilateral_P0{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;}; -sampler2D sBilateral_P0{Texture=tBilateral_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;}; - -#define FSIGMA1 (1.0/FSIGMA) - -#define COMPAT_TEXTURE(c,d) tex2D(c,d) - -float wt(float3 A, float3 B) -{ - return clamp(FBSMOOTH - 2.33*dot(abs(A-B),1.0.xxx)/(dot(A+B,1.0.xxx)+1.0), 0.0, 0.25); -} - - -float getw(float x, float3 c, float3 p) -{ - float y = pow(max(1.0-x,0.0), FSIGMA1); - float d = wt(c,p); - return y*d; -} - - - -float4 PS_Bilateral_X(float4 position: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target -{ - float4 SourceSize = float4((ViewportSize*BufferToViewportRatio), 1.0/(ViewportSize*BufferToViewportRatio)); -// float4 SourceSize = float4(1.0/NormalizedNativePixelSize, NormalizedNativePixelSize); - float2 pos = vTexCoord * SourceSize.xy; - float f = 0.5-frac(pos.x); - float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw; - float2 dx = float2(SourceSize.z, 0.0); - - float w, fp; - float wsum = 0.0; - float3 pixel; - float FPR = FRANGE; - float FPR1 = 1.0/FPR; - float LOOPSIZE = FPR; - float x = -FPR; - - float3 comp = COMPAT_TEXTURE(sBackBuffer, tex).rgb; - float3 color = 0.0.xxx; - - do - { - pixel = COMPAT_TEXTURE(sBackBuffer, tex + x*dx).rgb; - fp = min(abs(x+f),FPR)*FPR1; - w = getw(fp,comp,pixel); - color = color + w * pixel; - wsum = wsum + w; - - x = x + 1.0; - - } while (x <= LOOPSIZE); - - color = color / wsum; - - return float4(color, 1.0); -} - - -float4 PS_Bilateral_Y(float4 position: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target -{ - float4 SourceSize = float4((ViewportSize*BufferToViewportRatio), 1.0/(ViewportSize*BufferToViewportRatio)); - float2 pos = vTexCoord * SourceSize.xy; - float f = 0.5-frac(pos.y); - float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw; - float2 dy = float2(0.0, SourceSize.w); - - float w, fp; - float wsum = 0.0; - float3 pixel; - float FPR = FRANGE; - float FPR1 = 1.0/FPR; - float LOOPSIZE = FPR; - float y = -FPR; - - float3 comp = COMPAT_TEXTURE(sBilateral_P0, tex).rgb; - float3 color = 0.0.xxx; - - do - { - pixel = COMPAT_TEXTURE(sBilateral_P0, tex + y*dy).rgb; - fp = min(abs(y+f),FPR)*FPR1; - w = getw(fp,comp,pixel); - color = color + w * pixel; - wsum = wsum + w; - - y = y + 1.0; - - } while (y <= LOOPSIZE); - - color = color / wsum; - - return float4(color, 1.0); -} - -technique Bilateral -{ - - pass - { - VertexShader = PostProcessVS; - PixelShader = PS_Bilateral_X; - RenderTarget = tBilateral_P0; - } - pass - { - VertexShader = PostProcessVS; - PixelShader = PS_Bilateral_Y; - } - -} diff --git a/data/resources/shaders/reshade/Shaders/interpolation/lanczos3.fx b/data/resources/shaders/reshade/Shaders/interpolation/lanczos3.fx deleted file mode 100644 index f28929827..000000000 --- a/data/resources/shaders/reshade/Shaders/interpolation/lanczos3.fx +++ /dev/null @@ -1,146 +0,0 @@ -#include "ReShade.fxh" - -/* - Lanczos3 - Multipass code by Hyllian 2022. - -*/ - - -/* - Copyright (C) 2010 Team XBMC - http://www.xbmc.org - Copyright (C) 2011 Stefanos A. - http://www.opentk.com - -This Program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -This Program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with XBMC; see the file COPYING. If not, write to -the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. -http://www.gnu.org/copyleft/gpl.html -*/ - -uniform float L3_PRESCALE < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 8.0; - ui_step = 1.0; - ui_label = "Prescale factor"; -> = 1.0; - - -uniform bool LANCZOS3_ANTI_RINGING < - ui_type = "radio"; - ui_label = "Lanczos3 Anti-Ringing"; -> = true; - -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float BufferWidth < source = "bufferwidth"; >; - -texture2D tLanczos3_P0{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;}; -sampler2D sLanczos3_P0{Texture=tLanczos3_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;}; - - -#define AR_STRENGTH 1.0 -#define FIX(c) (max(abs(c),1e-5)) -#define PI 3.1415926535897932384626433832795 -#define radius 3.0 - -float3 weight3(float x) -{ - float3 Sampling = FIX(2.0 * PI * float3(x - 1.5, x - 0.5, x + 0.5)); - - // Lanczos3. Note: we normalize outside this function, so no point in multiplying by radius. - return sin(Sampling) * sin(Sampling / radius) / (Sampling * Sampling); -} - -float3 lanczos3ar(float fp, float3 C0, float3 C1, float3 C2, float3 C3, float3 C4, float3 C5) -{ - float3 w1 = weight3(0.5 - fp * 0.5); - float3 w2 = weight3(1.0 - fp * 0.5); - - float sum = dot(w1, 1.0.xxx) + dot(w2, 1.0.xxx); - w1 /= sum; - w2 /= sum; - - float3 color = mul(w1, float3x3( C0, C2, C4 )) + mul(w2, float3x3( C1, C3, C5)); - - // Anti-ringing - if (LANCZOS3_ANTI_RINGING == true) - { - float3 aux = color; - float3 min_sample = min(min(C1, C2), min(C3, C4)); - float3 max_sample = max(max(C1, C2), max(C3, C4)); - color = clamp(color, min_sample, max_sample); - color = lerp(aux, color, AR_STRENGTH*step(0.0, (C1-C2)*(C3-C4))); - } - - return color; -} - - - -float4 PS_Lanczos3_X(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target -{ - // Both dimensions are unfiltered, so it looks for lores pixels. - float2 ps = NormalizedNativePixelSize/L3_PRESCALE; - float2 pos = uv_tx.xy/ps - float2(0.5, 0.0); - float2 tc = (floor(pos) + 0.5.xx) * ps; - float2 fp = frac(pos); - - float3 C0 = tex2D(ReShade::BackBuffer, tc + ps*float2(-2.0, 0.0)).rgb; - float3 C1 = tex2D(ReShade::BackBuffer, tc + ps*float2(-1.0, 0.0)).rgb; - float3 C2 = tex2D(ReShade::BackBuffer, tc + ps*float2( 0.0, 0.0)).rgb; - float3 C3 = tex2D(ReShade::BackBuffer, tc + ps*float2( 1.0, 0.0)).rgb; - float3 C4 = tex2D(ReShade::BackBuffer, tc + ps*float2( 2.0, 0.0)).rgb; - float3 C5 = tex2D(ReShade::BackBuffer, tc + ps*float2( 3.0, 0.0)).rgb; - - float3 color = lanczos3ar(fp.x, C0, C1, C2, C3, C4, C5); - - return float4(color, 1.0); -} - - -float4 PS_Lanczos3_Y(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target -{ - // One must be careful here. Horizontal dimension is already filtered, so it looks for x in hires. - float2 ps = float2(1.0/BufferWidth, NormalizedNativePixelSize.y/L3_PRESCALE); - float2 pos = uv_tx.xy/ps - float2(0.0, 0.5); - float2 tc = (floor(pos) + 0.5.xx) * ps; - float2 fp = frac(pos); - - float3 C0 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, -2.0)).rgb; - float3 C1 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, -1.0)).rgb; - float3 C2 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 0.0)).rgb; - float3 C3 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 1.0)).rgb; - float3 C4 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 2.0)).rgb; - float3 C5 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 3.0)).rgb; - - float3 color = lanczos3ar(fp.y, C0, C1, C2, C3, C4, C5); - - return float4(color, 1.0); -} - - -technique Lanczos3 -{ - pass - { - VertexShader = PostProcessVS; - PixelShader = PS_Lanczos3_X; - RenderTarget = tLanczos3_P0; - } - pass - { - VertexShader = PostProcessVS; - PixelShader = PS_Lanczos3_Y; - } -} diff --git a/data/resources/shaders/reshade/Shaders/misc/deblur-luma.fx b/data/resources/shaders/reshade/Shaders/misc/deblur-luma.fx deleted file mode 100644 index 7d095542a..000000000 --- a/data/resources/shaders/reshade/Shaders/misc/deblur-luma.fx +++ /dev/null @@ -1,151 +0,0 @@ -#include "ReShade.fxh" - -/* - Deblur-Luma Shader - - Copyright (C) 2005 - 2024 guest(r) - guest.r@gmail.com - - Luma adaptation by Hyllian - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -*/ - -uniform float OFFSET < - ui_type = "drag"; - ui_min = 0.25; - ui_max = 4.0; - ui_step = 0.25; - ui_label = "Deblur offset"; -> = 2.0; - -uniform float DEBLUR < - ui_type = "drag"; - ui_min = 1.0; - ui_max = 7.0; - ui_step = 0.25; - ui_label = "Deblur str."; -> = 1.75; - -uniform float SMART < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Smart deblur"; -> = 1.0; - -uniform float2 ViewportSize < source = "viewportsize"; >; - - -static const float3 luma = float3(0.299,0.587,0.114); -static const float4 res = float4(0.0001, 0.0001, 0.0001, 0.0001); -static const float4 uno = float4(1.,1.,1.,1.); - - -float min8(float4 a4, float4 b4) -{ - float4 ab4 = min(a4, b4); float2 ab2 = min(ab4.xy, ab4.zw); return min(ab2.x, ab2.y); -} - -float max8(float4 a4, float4 b4) -{ - float4 ab4 = max(a4, b4); float2 ab2 = max(ab4.xy, ab4.zw); return max(ab2.x, ab2.y); -} - - -struct ST_VertexOut -{ - float4 t1 : TEXCOORD1; - float4 t2 : TEXCOORD2; - float4 t3 : TEXCOORD3; -}; - - -// Vertex shader generating a triangle covering the entire screen -void VS_Deblur_Luma(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - float dx = OFFSET/ViewportSize.x; - float dy = OFFSET/ViewportSize.y; - - vVARS.t1 = texcoord.xxxy + float4( -dx, 0.0, dx, -dy); // c00 c10 c20 - vVARS.t2 = texcoord.xxxy + float4( -dx, 0.0, dx, 0.0); // c01 c11 c21 - vVARS.t3 = texcoord.xxxy + float4( -dx, 0.0, dx, dy); // c02 c12 c22 -} - - -float4 PS_Deblur_Luma(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target -{ - - float3 c11 = tex2D(ReShade::BackBuffer, vVARS.t2.yw).xyz; - float3 c00 = tex2D(ReShade::BackBuffer, vVARS.t1.xw).xyz; - float3 c20 = tex2D(ReShade::BackBuffer, vVARS.t1.zw).xyz; - float3 c22 = tex2D(ReShade::BackBuffer, vVARS.t3.zw).xyz; - float3 c02 = tex2D(ReShade::BackBuffer, vVARS.t3.xw).xyz; - float3 c10 = tex2D(ReShade::BackBuffer, vVARS.t1.yw).xyz; - float3 c21 = tex2D(ReShade::BackBuffer, vVARS.t2.zw).xyz; - float3 c12 = tex2D(ReShade::BackBuffer, vVARS.t3.yw).xyz; - float3 c01 = tex2D(ReShade::BackBuffer, vVARS.t2.xw).xyz; - - float4x3 chv = float4x3(c10, c01, c21, c12); - float4x3 cdi = float4x3(c00, c02, c20, c22); - - float4 CHV = mul(chv, luma); - float4 CDI = mul(cdi, luma); - float C11 = dot(c11, luma); - - float mn1 = min8(CHV, CDI); - float mx1 = max8(CHV, CDI); - - float2 mnmx = float2(min(C11, mn1), max(C11, mx1)); - - float2 dif = abs(float2(C11, C11) - mnmx) + res.xy; - - dif = pow(dif, float2(DEBLUR, DEBLUR)); - - float D11 = dot(dif, mnmx.yx)/(dif.x + dif.y); - - float k11 = 1.0/(abs(C11 - D11) + res.x); - - float4 khv = float4(1.0/(abs(CHV-float4(D11, D11, D11, D11)) + res)); - float4 kdi = float4(1.0/(abs(CDI-float4(D11, D11, D11, D11)) + res)); - - float avg = (dot(khv + kdi, uno) + k11)/10.0; - - khv = max(khv-float4(avg, avg, avg, avg), float4(0.0, 0.0, 0.0, 0.0)); - kdi = max(kdi-float4(avg, avg, avg, avg), float4(0.0, 0.0, 0.0, 0.0)); - k11 = max(k11-avg, 0.0); - - float3 d11 = (mul(khv, chv) + mul(kdi, cdi) + (k11 + res.x)*c11) / (dot(khv + kdi, uno) + k11 + res.x); - - float contrast = mnmx.y - mnmx.x; - c11 = lerp(c11, d11, clamp(1.75*contrast-0.125, 0.0, 1.0)); - c11 = lerp(d11, c11, SMART); - - return float4(c11, 1.0); -} - - -technique Deblur_Luma -{ - pass - { - VertexShader = VS_Deblur_Luma; - PixelShader = PS_Deblur_Luma; - } -} diff --git a/data/resources/shaders/reshade/Shaders/misc/geom.fx b/data/resources/shaders/reshade/Shaders/misc/geom.fx deleted file mode 100644 index 0b610ca0a..000000000 --- a/data/resources/shaders/reshade/Shaders/misc/geom.fx +++ /dev/null @@ -1,325 +0,0 @@ -#include "ReShade.fxh" - -/* - Geom Shader - a modified CRT-Geom without CRT features made to be appended/integrated - into any other shaders and provide curvature/warping/oversampling features. - - Adapted by Hyllian (2024). -*/ - - -/* - CRT-interlaced - - Copyright (C) 2010-2012 cgwg, Themaister and DOLLS - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - (cgwg gave their consent to have the original version of this shader - distributed under the GPL in this message: - - http://board.byuu.org/viewtopic.php?p=26075#p26075 - - "Feel free to distribute my shaders under the GPL. After all, the - barrel distortion code was taken from the Curvature shader, which is - under the GPL." - ) - This shader variant is pre-configured with screen curvature -*/ - - - -uniform bool geom_curvature < - ui_type = "radio"; - ui_label = "Geom Curvature Toggle"; -> = 1.0; - -uniform float geom_R < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 10.0; - ui_step = 0.1; - ui_label = "Geom Curvature Radius"; -> = 2.0; - -uniform float geom_d < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 3.0; - ui_step = 0.1; - ui_label = "Geom Distance"; -> = 1.5; - -uniform bool geom_invert_aspect < - ui_type = "radio"; - ui_label = "Geom Curvature Aspect Inversion"; -> = 0.0; - -uniform float geom_cornersize < - ui_type = "drag"; - ui_min = 0.001; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Geom Corner Size"; -> = 0.03; - -uniform float geom_cornersmooth < - ui_type = "drag"; - ui_min = 80.0; - ui_max = 2000.0; - ui_step = 100.0; - ui_label = "Geom Corner Smoothness"; -> = 1000.0; - -uniform float geom_x_tilt < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Geom Horizontal Tilt"; -> = 0.0; - -uniform float geom_y_tilt < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Geom Vertical Tilt"; -> = 0.0; - -uniform float geom_overscan_x < - ui_type = "drag"; - ui_min = -125.0; - ui_max = 125.0; - ui_step = 0.5; - ui_label = "Geom Horiz. Overscan %"; -> = 100.0; - -uniform float geom_overscan_y < - ui_type = "drag"; - ui_min = -125.0; - ui_max = 125.0; - ui_step = 0.5; - ui_label = "Geom Vert. Overscan %"; -> = 100.0; - -uniform float centerx < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 0.1; - ui_label = "Image Center X"; -> = 0.00; - -uniform float centery < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 0.1; - ui_label = "Image Center Y"; -> = 0.00; - -uniform float geom_lum < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 2.0; - ui_step = 0.01; - ui_label = "Geom Luminance"; -> = 1.0; - -uniform float geom_target_gamma < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 5.0; - ui_step = 0.1; - ui_label = "Geom Target Gamma"; -> = 2.4; - -uniform float geom_monitor_gamma < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 5.0; - ui_step = 0.1; - ui_label = "Geom Monitor Gamma"; -> = 2.2; - - -uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >; -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float2 ViewportSize < source = "viewportsize"; >; -uniform float ViewportWidth < source = "viewportwidth"; >; -uniform float ViewportHeight < source = "viewportheight"; >; - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;}; - -// Comment the next line to disable interpolation in linear gamma (and -// gain speed). -#define LINEAR_PROCESSING - -// Enable 3x oversampling of the beam profile; improves moire effect caused by scanlines+curvature -#define OVERSAMPLE - -// Use the older, purely gaussian beam profile; uncomment for speed -//#define USEGAUSSIAN - -// Macros. -#define FIX(c) max(abs(c), 1e-5); -#define PI 3.141592653589 - -#ifdef LINEAR_PROCESSING -# define TEX2D(c) pow(tex2D(sBackBuffer, (c)), geom_target_gamma.xxxx) -#else -# define TEX2D(c) tex2D(sBackBuffer, (c)) -#endif - -// aspect ratio -#define aspect (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth)) -#define overscan (1.01.xx); - - -struct ST_VertexOut -{ - float2 sinangle : TEXCOORD1; - float2 cosangle : TEXCOORD2; - float3 stretch : TEXCOORD3; - float2 TextureSize : TEXCOORD4; -}; - - -float intersect(float2 xy, float2 sinangle, float2 cosangle) -{ - float A = dot(xy,xy) + geom_d*geom_d; - float B, C; - - B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d); - C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y; - - return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A); -} - -float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle) -{ - float c = intersect(xy, sinangle, cosangle); - float2 point = (c.xx*xy + geom_R.xx*sinangle) / geom_R.xx; - float2 poc = point/cosangle; - float2 tang = sinangle/cosangle; - - float A = dot(tang, tang) + 1.0; - float B = -2.0*dot(poc, tang); - float C = dot(poc, poc) - 1.0; - - float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A); - float2 uv = (point - a*sinangle) / cosangle; - float r = FIX(geom_R*acos(a)); - - return uv*r/sin(r/geom_R); -} - -float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle) -{ - float r = FIX(sqrt(dot(uv, uv))); - uv *= sin(r/geom_R)/r; - float x = 1.0 - cos(r/geom_R); - float D; - - D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle); - - return geom_d*(uv*cosangle - x*sinangle)/D; -} - -float3 maxscale(float2 sinangle, float2 cosangle) -{ - float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle); - float2 a = 0.5.xx*aspect; - - float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x, - fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect; - float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x, - fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect; - - return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y)); -} - -float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch) -{ - coord = (coord - 0.5.xx)*aspect*stretch.z + stretch.xy; - - return (bkwtrans(coord, sinangle, cosangle) / - float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + 0.5.xx); -} - - -// Vertex shader generating a triangle covering the entire screen -void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - // Screen centering - texcoord = texcoord - float2(centerx,centery)/100.0; - - float2 SourceSize = 1.0/NormalizedNativePixelSize; - - // Precalculate a bunch of useful values we'll need in the fragment - // shader. - vVARS.sinangle = sin(float2(geom_x_tilt, geom_y_tilt)); - vVARS.cosangle = cos(float2(geom_x_tilt, geom_y_tilt)); - vVARS.stretch = maxscale(vVARS.sinangle, vVARS.cosangle); - vVARS.TextureSize = float2(SourceSize.x, SourceSize.y); -} - - -float corner(float2 coord) -{ - coord = min(coord, 1.0.xx - coord) * aspect; - float2 cdist = geom_cornersize.xx; - coord = (cdist - min(coord, cdist)); - float dist = sqrt(dot(coord, coord)); - - return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0); -} - -float fwidth(float value) -{ - return abs(ddx(value)) + abs(ddy(value)); -} - - -float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target -{ - // Texture coordinates of the texel containing the active pixel. - float2 xy = (geom_curvature == true) ? transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch) : vTexCoord; - - float cval = corner((xy-0.5.xx) * BufferToViewportRatio + 0.5.xx); - - float2 uv_ratio = frac((xy * vVARS.TextureSize - 0.5.xx) / vVARS.TextureSize); - - float4 col = TEX2D(xy); - -#ifndef LINEAR_PROCESSING - col = pow(col, geom_target_gamma.xxxx); -#endif - - col.rgb *= (geom_lum * step(0.0, uv_ratio.y)); - - float3 mul_res = col.rgb * cval.xxx; - - // Convert the image gamma for display on our output device. - mul_res = pow(mul_res, 1.0 / geom_monitor_gamma.xxx); - - return float4(mul_res, 1.0); -} - - -technique CRT_Geom -{ - pass - { - VertexShader = VS_CRT_Geom; - PixelShader = PS_CRT_Geom; - } -} diff --git a/data/resources/shaders/reshade/Shaders/misc/include/geom.fxh b/data/resources/shaders/reshade/Shaders/misc/include/geom.fxh deleted file mode 100644 index d373f9d38..000000000 --- a/data/resources/shaders/reshade/Shaders/misc/include/geom.fxh +++ /dev/null @@ -1,224 +0,0 @@ -#ifndef GEOM_PARAMS_H -#define GEOM_PARAMS_H - -/* - Geom Shader - a modified CRT-Geom without CRT features made to be appended/integrated - into any other shaders and provide curvature/warping/oversampling features. - - Adapted by Hyllian (2024). -*/ - - -/* - CRT-interlaced - - Copyright (C) 2010-2012 cgwg, Themaister and DOLLS - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - (cgwg gave their consent to have the original version of this shader - distributed under the GPL in this message: - - http://board.byuu.org/viewtopic.php?p=26075#p26075 - - "Feel free to distribute my shaders under the GPL. After all, the - barrel distortion code was taken from the Curvature shader, which is - under the GPL." - ) - This shader variant is pre-configured with screen curvature -*/ - - -uniform bool geom_curvature < - ui_type = "radio"; - ui_category = "Geom Curvature"; - ui_label = "Geom Curvature Toggle"; -> = 0.0; - -uniform float geom_R < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 10.0; - ui_step = 0.1; - ui_category = "Geom Curvature"; - ui_label = "Geom Curvature Radius"; -> = 2.0; - -uniform float geom_d < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 3.0; - ui_step = 0.1; - ui_category = "Geom Curvature"; - ui_label = "Geom Distance"; -> = 1.5; - -uniform bool geom_invert_aspect < - ui_type = "radio"; - ui_category = "Geom Curvature"; - ui_label = "Geom Curvature Aspect Inversion"; -> = 0.0; - -uniform float geom_cornersize < - ui_type = "drag"; - ui_min = 0.001; - ui_max = 1.0; - ui_step = 0.005; - ui_category = "Geom Curvature"; - ui_label = "Geom Corner Size"; -> = 0.03; - -uniform float geom_cornersmooth < - ui_type = "drag"; - ui_min = 80.0; - ui_max = 2000.0; - ui_step = 100.0; - ui_category = "Geom Curvature"; - ui_label = "Geom Corner Smoothness"; -> = 1000.0; - -uniform float geom_x_tilt < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_category = "Geom Curvature"; - ui_label = "Geom Horizontal Tilt"; -> = 0.0; - -uniform float geom_y_tilt < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_category = "Geom Curvature"; - ui_label = "Geom Vertical Tilt"; -> = 0.0; - -uniform float geom_overscan_x < - ui_type = "drag"; - ui_min = -125.0; - ui_max = 125.0; - ui_step = 0.5; - ui_category = "Geom Curvature"; - ui_label = "Geom Horiz. Overscan %"; -> = 100.0; - -uniform float geom_overscan_y < - ui_type = "drag"; - ui_min = -125.0; - ui_max = 125.0; - ui_step = 0.5; - ui_category = "Geom Curvature"; - ui_label = "Geom Vert. Overscan %"; -> = 100.0; - -uniform float centerx < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 0.1; - ui_category = "Geom Curvature"; - ui_label = "Image Center X"; -> = 0.00; - -uniform float centery < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 0.1; - ui_category = "Geom Curvature"; - ui_label = "Image Center Y"; -> = 0.00; - - - -// Macros. -#define FIX(c) max(abs(c), 1e-5); - -// aspect ratio -#define aspect (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth)) - - -float intersect(float2 xy, float2 sinangle, float2 cosangle) -{ - float A = dot(xy,xy) + geom_d*geom_d; - float B, C; - - B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d); - C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y; - - return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A); -} - -float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle) -{ - float c = intersect(xy, sinangle, cosangle); - float2 point = (c.xx*xy + geom_R.xx*sinangle) / geom_R.xx; - float2 poc = point/cosangle; - float2 tang = sinangle/cosangle; - - float A = dot(tang, tang) + 1.0; - float B = -2.0*dot(poc, tang); - float C = dot(poc, poc) - 1.0; - - float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A); - float2 uv = (point - a*sinangle) / cosangle; - float r = FIX(geom_R*acos(a)); - - return uv*r/sin(r/geom_R); -} - -float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle) -{ - float r = FIX(sqrt(dot(uv, uv))); - uv *= sin(r/geom_R)/r; - float x = 1.0 - cos(r/geom_R); - float D; - - D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle); - - return geom_d*(uv*cosangle - x*sinangle)/D; -} - -float3 maxscale(float2 sinangle, float2 cosangle) -{ - float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle); - float2 a = 0.5.xx*aspect; - - float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x, - fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect; - float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x, - fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect; - - return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y)); -} - -float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch) -{ - coord = (coord - 0.5.xx)*aspect*stretch.z + stretch.xy; - - return (bkwtrans(coord, sinangle, cosangle) / - float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + 0.5.xx); -} - - -float corner(float2 coord) -{ - coord = min(coord, 1.0.xx - coord) * aspect; - float2 cdist = geom_cornersize.xx; - coord = (cdist - min(coord, cdist)); - float dist = sqrt(dot(coord, coord)); - - return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0); -} - -float fwidth(float value) -{ - return abs(ddx(value)) + abs(ddy(value)); -} - -#endif // GEOM_PARAMS_H diff --git a/data/resources/shaders/reshade/Shaders/misc/include/mask.fxh b/data/resources/shaders/reshade/Shaders/misc/include/mask.fxh deleted file mode 100644 index 2fa70f661..000000000 --- a/data/resources/shaders/reshade/Shaders/misc/include/mask.fxh +++ /dev/null @@ -1,242 +0,0 @@ -#ifndef MASK_PARAMS_H -#define MASK_PARAMS_H - -uniform float MASK_DARK_STRENGTH < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 1.0; - ui_step = 0.01; - ui_category = "CRT Mask"; - ui_label = "MASK DARK SUBPIXEL STRENGTH"; -> = 0.5; - -uniform float MASK_LIGHT_STRENGTH < - ui_type = "drag"; - ui_min = 0.0; - ui_max = 6.0; - ui_step = 0.01; - ui_category = "CRT Mask"; - ui_label = "MASK LIGHT SUBPIXEL STRENGTH"; -> = 0.5; - -/* Mask code pasted from subpixel_masks.h. Masks 3 and 4 added. */ -float3 mask_weights(float2 coord, int phosphor_layout, float monitor_subpixels, float mask_light_str, float mask_dark_str){ - float3 weights = float3(1.,1.,1.); - float on = 1.+mask_light_str; -// float on = 1.; - float off = 1.-mask_dark_str; - float3 red = monitor_subpixels==1.0 ? float3(on, off, off) : float3(off, off, on ); - float3 green = float3(off, on, off); - float3 blue = monitor_subpixels==1.0 ? float3(off, off, on ) : float3(on, off, off); - float3 magenta = float3(on, off, on ); - float3 yellow = monitor_subpixels==1.0 ? float3(on, on, off) : float3(off, on, on ); - float3 cyan = monitor_subpixels==1.0 ? float3(off, on, on ) : float3(on, on, off); - float3 black = float3(off, off, off); - float3 white = float3(on, on, on ); - int w, z = 0; - - // This pattern is used by a few layouts, so we'll define it here - float3 aperture_weights = lerp(magenta, green, floor(coord.x % 2.0)); - - if(phosphor_layout == 0) return weights; - - else if(phosphor_layout == 1){ - // classic aperture for RGB panels; good for 1080p, too small for 4K+ - // aka aperture_1_2_bgr - weights = aperture_weights; - return weights; - } - - else if(phosphor_layout == 2){ - // Classic RGB layout; good for 1080p and lower - float3 bw3[3] = {red, green, blue}; -// float3 bw3[3] = float3[](black, yellow, blue); - - z = int(floor(coord.x % 3.0)); - - weights = bw3[z]; - return weights; - } - - else if(phosphor_layout == 3){ - // black and white aperture; good for weird subpixel layouts and low brightness; good for 1080p and lower - float3 bw3[3] = {black, white, black}; - - z = int(floor(coord.x % 3.0)); - - weights = bw3[z]; - return weights; - } - - else if(phosphor_layout == 4){ - // reduced TVL aperture for RGB panels. Good for 4k. - // aperture_2_4_rgb - - float3 big_ap_rgb[4] = {red, yellow, cyan, blue}; - - w = int(floor(coord.x % 4.0)); - - weights = big_ap_rgb[w]; - return weights; - } - - else if(phosphor_layout == 5){ - // black and white aperture; good for weird subpixel layouts and low brightness; good for 4k - float3 bw4[4] = {black, black, white, white}; - - z = int(floor(coord.x % 4.0)); - - weights = bw4[z]; - return weights; - } - - else if(phosphor_layout == 6){ - // aperture_1_4_rgb; good for simulating lower - float3 ap4[4] = {red, green, blue, black}; - - z = int(floor(coord.x % 4.0)); - - weights = ap4[z]; - return weights; - } - - else if(phosphor_layout == 7){ - // 2x2 shadow mask for RGB panels; good for 1080p, too small for 4K+ - // aka delta_1_2x1_bgr - float3 inverse_aperture = lerp(green, magenta, floor(coord.x % 2.0)); - weights = lerp(aperture_weights, inverse_aperture, floor(coord.y % 2.0)); - return weights; - } - - else if(phosphor_layout == 8){ - // delta_2_4x1_rgb - float3 delta[8] = { - red, yellow, cyan, blue, - cyan, blue, red, yellow - }; - - w = int(floor(coord.y % 2.0)); - z = int(floor(coord.x % 4.0)); - - weights = delta[4*w+z]; - return weights; - } - - else if(phosphor_layout == 9){ - // delta_1_4x1_rgb; dunno why this is called 4x1 when it's obviously 4x2 /shrug - float3 delta1[8] = { - red, green, blue, black, - blue, black, red, green - }; - - w = int(floor(coord.y % 2.0)); - z = int(floor(coord.x % 4.0)); - - weights = delta1[4*w+z]; - return weights; - } - - else if(phosphor_layout == 10){ - // delta_2_4x2_rgb - float3 delta[16] = { - red, yellow, cyan, blue, - red, yellow, cyan, blue, - cyan, blue, red, yellow, - cyan, blue, red, yellow - }; - - w = int(floor(coord.y % 4.0)); - z = int(floor(coord.x % 4.0)); - - weights = delta[4*w+z]; - return weights; - } - - else if(phosphor_layout == 11){ - // slot mask for RGB panels; looks okay at 1080p, looks better at 4K - float3 slotmask[24] = { - red, green, blue, red, green, blue, - red, green, blue, black, black, black, - red, green, blue, red, green, blue, - black, black, black, red, green, blue, - }; - - w = int(floor(coord.y % 4.0)); - z = int(floor(coord.x % 6.0)); - - // use the indexes to find which color to apply to the current pixel - weights = slotmask[6*w+z]; - return weights; - } - - else if(phosphor_layout == 12){ - // slot mask for RGB panels; looks okay at 1080p, looks better at 4K - float3 slotmask[24] = { - black, white, black, black, white, black, - black, white, black, black, black, black, - black, white, black, black, white, black, - black, black, black, black, white, black - }; - - w = int(floor(coord.y % 4.0)); - z = int(floor(coord.x % 6.0)); - - // use the indexes to find which color to apply to the current pixel - weights = slotmask[6*w+z]; - return weights; - } - - else if(phosphor_layout == 13){ - // based on MajorPainInTheCactus' HDR slot mask - float3 slot[32] = { - red, green, blue, black, red, green, blue, black, - red, green, blue, black, black, black, black, black, - red, green, blue, black, red, green, blue, black, - black, black, black, black, red, green, blue, black - }; - - w = int(floor(coord.y % 4.0)); - z = int(floor(coord.x % 8.0)); - - weights = slot[8*w+z]; - return weights; - } - - else if(phosphor_layout == 14){ - // same as above but for RGB panels - float3 slot2[40] = { - red, yellow, green, blue, blue, red, yellow, green, blue, blue , - black, green, green, blue, blue, red, red, black, black, black, - red, yellow, green, blue, blue, red, yellow, green, blue, blue , - red, red, black, black, black, black, green, green, blue, blue - }; - - w = int(floor(coord.y % 4.0)); - z = int(floor(coord.x % 10.0)); - - weights = slot2[10*w+z]; - return weights; - } - - else if(phosphor_layout == 15){ - // slot_3_7x6_rgb - float3 slot[84] = { - red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue, - red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue, - red, red, yellow, green, cyan, blue, blue, black, black, black, black, black, black, black, - red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue, - red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue, - black, black, black, black, black, black, black, black, red, red, yellow, green, cyan, blue - }; - - w = int(floor(coord.y % 6.0)); - z = int(floor(coord.x % 14.0)); - - weights = slot[14*w+z]; - return weights; - } - - else return weights; -} - -#endif // MASK_PARAMS_H diff --git a/data/resources/shaders/reshade/Shaders/overlay/geom-overlay.fx b/data/resources/shaders/reshade/Shaders/overlay/geom-overlay.fx deleted file mode 100644 index b41888a77..000000000 --- a/data/resources/shaders/reshade/Shaders/overlay/geom-overlay.fx +++ /dev/null @@ -1,415 +0,0 @@ -#include "ReShade.fxh" - -/* - Geom Shader - a modified CRT-Geom without CRT features made to be appended/integrated - into any other shaders and provide curvature/warping/oversampling features. - - Adapted by Hyllian (2024). -*/ - - -/* - CRT-interlaced - - Copyright (C) 2010-2012 cgwg, Themaister and DOLLS - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - (cgwg gave their consent to have the original version of this shader - distributed under the GPL in this message: - - http://board.byuu.org/viewtopic.php?p=26075#p26075 - - "Feel free to distribute my shaders under the GPL. After all, the - barrel distortion code was taken from the Curvature shader, which is - under the GPL." - ) - This shader variant is pre-configured with screen curvature -*/ - - - -uniform bool geom_curvature < - ui_type = "radio"; - ui_label = "Geom Curvature Toggle"; - ui_category = "Curvature"; - ui_tooltip = "This shader only works with Aspect Ratio: Stretch to Fill."; -> = true; - -uniform float geom_R < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 10.0; - ui_step = 0.1; - ui_label = "Geom Curvature Radius"; -> = 10.0; - -uniform float geom_d < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 10.0; - ui_step = 0.1; - ui_label = "Geom Distance"; -> = 10.0; - -uniform bool geom_invert_aspect < - ui_type = "radio"; - ui_label = "Geom Curvature Aspect Inversion"; -> = 0.0; - -uniform float geom_cornersize < - ui_type = "drag"; - ui_min = 0.001; - ui_max = 1.0; - ui_step = 0.005; - ui_label = "Geom Corner Size"; -> = 0.006; - -uniform float geom_cornersmooth < - ui_type = "drag"; - ui_min = 80.0; - ui_max = 2000.0; - ui_step = 100.0; - ui_label = "Geom Corner Smoothness"; -> = 200.0; - -uniform float geom_x_tilt < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Geom Horizontal Tilt"; -> = 0.0; - -uniform float geom_y_tilt < - ui_type = "drag"; - ui_min = -1.0; - ui_max = 1.0; - ui_step = 0.05; - ui_label = "Geom Vertical Tilt"; -> = 0.0; - -uniform float geom_overscan_x < - ui_type = "drag"; - ui_min = -125.0; - ui_max = 125.0; - ui_step = 0.5; - ui_label = "Geom Horiz. Overscan %"; -> = 48.5; - -uniform float geom_overscan_y < - ui_type = "drag"; - ui_min = -125.0; - ui_max = 125.0; - ui_step = 0.5; - ui_label = "Geom Vert. Overscan %"; -> = 64.5; - -uniform float centerx < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 0.1; - ui_label = "Image Center X"; -> = 0.0; - -uniform float centery < - ui_type = "drag"; - ui_min = -100.0; - ui_max = 100.0; - ui_step = 0.1; - ui_label = "Image Center Y"; -> = -8.8; - -uniform float geom_lum < - ui_type = "drag"; - ui_min = 0.5; - ui_max = 2.0; - ui_step = 0.01; - ui_label = "Geom Luminance"; -> = 1.0; - -uniform float geom_target_gamma < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 5.0; - ui_step = 0.1; - ui_label = "Geom Target Gamma"; -> = 2.4; - -uniform float geom_monitor_gamma < - ui_type = "drag"; - ui_min = 0.1; - ui_max = 5.0; - ui_step = 0.1; - ui_label = "Geom Monitor Gamma"; -> = 2.2; - - -uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >; -uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >; -uniform float2 ViewportSize < source = "viewportsize"; >; -uniform float ViewportX < source = "viewportx"; >; -uniform float ViewportY < source = "viewporty"; >; -uniform float ViewportWidth < source = "viewportwidth"; >; -uniform float ViewportHeight < source = "viewportheight"; >; -uniform float2 ViewportOffset < source = "viewportoffset"; >; - -sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;}; - -texture tOverlay < source = "overlay/psx.jpg"; > -{ - Width = BUFFER_WIDTH; - Height = BUFFER_HEIGHT; - MipLevels = 1; -}; - -sampler sOverlay { Texture = tOverlay; AddressU = BORDER; AddressV = BORDER; MinFilter = LINEAR; MagFilter = LINEAR;}; - -// Comment the next line to disable interpolation in linear gamma (and -// gain speed). -#define LINEAR_PROCESSING - -// Enable 3x oversampling of the beam profile; improves moire effect caused by scanlines+curvature -#define OVERSAMPLE - -// Use the older, purely gaussian beam profile; uncomment for speed -//#define USEGAUSSIAN - -// Macros. -#define FIX(c) max(abs(c), 1e-5); -#define PI 3.141592653589 - -#ifdef LINEAR_PROCESSING -# define TEX2D(c) pow(tex2D(sBackBuffer, (c)), float4(geom_target_gamma,geom_target_gamma,geom_target_gamma,geom_target_gamma)) -#else -# define TEX2D(c) tex2D(sBackBuffer, (c)) -#endif - -// aspect ratio -#define aspect (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth)) -#define overscan (float2(1.01,1.01)); - - -struct ST_VertexOut -{ - float2 sinangle : TEXCOORD1; - float2 cosangle : TEXCOORD2; - float3 stretch : TEXCOORD3; - float2 TextureSize : TEXCOORD4; -}; - - -float vs_intersect(float2 xy, float2 sinangle, float2 cosangle) -{ - float A = dot(xy,xy) + geom_d*geom_d; - float B = 2.0*(geom_R*(dot(xy,sinangle)-geom_d*cosangle.x*cosangle.y)-geom_d*geom_d); - float C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y; - - return (-B-sqrt(B*B-4.0*A*C))/(2.0*A); -} - -float2 vs_bkwtrans(float2 xy, float2 sinangle, float2 cosangle) -{ - float c = vs_intersect(xy, sinangle, cosangle); - float2 point = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R); - float2 poc = point/cosangle; - - float2 tang = sinangle/cosangle; - float A = dot(tang, tang) + 1.0; - float B = -2.0*dot(poc, tang); - float C = dot(poc, poc) - 1.0; - - float a = (-B + sqrt(B*B - 4.0*A*C))/(2.0*A); - float2 uv = (point - a*sinangle)/cosangle; - float r = FIX(geom_R*acos(a)); - - return uv*r/sin(r/geom_R); -} - -float2 vs_fwtrans(float2 uv, float2 sinangle, float2 cosangle) -{ - float r = FIX(sqrt(dot(uv,uv))); - uv *= sin(r/geom_R)/r; - float x = 1.0-cos(r/geom_R); - float D = geom_d/geom_R + x*cosangle.x*cosangle.y+dot(uv,sinangle); - - return geom_d*(uv*cosangle-x*sinangle)/D; -} - -float3 vs_maxscale(float2 sinangle, float2 cosangle) -{ - float2 c = vs_bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle); - float2 a = float2(0.5,0.5)*aspect; - - float2 lo = float2(vs_fwtrans(float2(-a.x, c.y), sinangle, cosangle).x, - vs_fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect; - - float2 hi = float2(vs_fwtrans(float2(+a.x, c.y), sinangle, cosangle).x, - vs_fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect; - - return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y)); -} - - - - -// Vertex shader generating a triangle covering the entire screen -void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS) -{ - texcoord.x = (id == 2) ? 2.0 : 0.0; - texcoord.y = (id == 1) ? 2.0 : 0.0; - position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - - float2 SourceSize = 1.0/NormalizedNativePixelSize; - - // Precalculate a bunch of useful values we'll need in the fragment - // shader. - vVARS.sinangle = sin(float2(geom_x_tilt, geom_y_tilt)); - vVARS.cosangle = cos(float2(geom_x_tilt, geom_y_tilt)); - vVARS.stretch = vs_maxscale(vVARS.sinangle, vVARS.cosangle); - vVARS.TextureSize = float2(SourceSize.x, SourceSize.y); -} - - - -float intersect(float2 xy, float2 sinangle, float2 cosangle) -{ - float A = dot(xy,xy) + geom_d*geom_d; - float B, C; - - B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d); - C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y; - - return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A); -} - -float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle) -{ - float c = intersect(xy, sinangle, cosangle); - float2 point = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R); - float2 poc = point/cosangle; - float2 tang = sinangle/cosangle; - - float A = dot(tang, tang) + 1.0; - float B = -2.0*dot(poc, tang); - float C = dot(poc, poc) - 1.0; - - float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A); - float2 uv = (point - a*sinangle) / cosangle; - float r = FIX(geom_R*acos(a)); - - return uv*r/sin(r/geom_R); -} - -float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle) -{ - float r = FIX(sqrt(dot(uv, uv))); - uv *= sin(r/geom_R)/r; - float x = 1.0 - cos(r/geom_R); - float D; - - D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle); - - return geom_d*(uv*cosangle - x*sinangle)/D; -} - -float3 maxscale(float2 sinangle, float2 cosangle) -{ - float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle); - float2 a = float2(0.5, 0.5)*aspect; - - float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x, - fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect; - float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x, - fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect; - - return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y)); -} - -float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch) -{ - coord = (coord - float2(0.5, 0.5))*aspect*stretch.z + stretch.xy; - - return (bkwtrans(coord, sinangle, cosangle) / - float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + float2(0.5, 0.5)); -} - -float corner(float2 coord) -{ - coord = min(coord, float2(1.0, 1.0) - coord) * aspect; - float2 cdist = float2(geom_cornersize, geom_cornersize); - coord = (cdist - min(coord, cdist)); - float dist = sqrt(dot(coord, coord)); - - return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0); -} - -float fwidth(float value){ - return abs(ddx(value)) + abs(ddy(value)); -} - - -// Code snippet borrowed from crt-cyclon. (credits to DariusG) -float2 Warp(float2 pos) -{ - pos = pos*2.0 - 1.0; - pos *= float2(1.0 + pos.y*pos.y*0, 1.0 + pos.x*pos.x*0); - pos = pos*0.5 + 0.5; - - return pos; -} - -float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target -{ - // Texture coordinates of the texel containing the active pixel. - float2 xy; - - if (geom_curvature == true) - xy = transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch); - else - xy = vTexCoord; - - // center screen - xy = Warp(xy - float2(centerx,centery)/100.0); - - float cval = corner((xy-float2(0.5,0.5)) * BufferToViewportRatio + float2(0.5,0.5)); - - float2 uv_ratio = frac((xy * vVARS.TextureSize - float2(0.5, 0.5)) / vVARS.TextureSize); - - float4 col = TEX2D(xy); - -#ifndef LINEAR_PROCESSING - col = pow(col , float4(geom_target_gamma, geom_target_gamma, geom_target_gamma, geom_target_gamma)); -#endif - - col.rgb *= (geom_lum * step(0.0, uv_ratio.y)); - - float3 mul_res = col.rgb * float3(cval, cval, cval); - - // Convert the image gamma for display on our output device. - mul_res = pow(mul_res, float3(1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma)); - - float4 overlay = tex2D(sOverlay, vTexCoord); - - float2 top_left = (float2(ViewportX, ViewportY) - ViewportOffset)/ViewportSize; - float2 bottom_right = (float2(ViewportX + ViewportWidth, ViewportY + ViewportHeight) - ViewportOffset)/ViewportSize; - - if (xy.x < top_left.x || xy.x > bottom_right.x || xy.y < top_left.y || xy.y > bottom_right.y) - mul_res = overlay.rgb; - - return float4(mul_res, 1.0); -} - - -technique CRT_Geom -{ - pass - { - VertexShader = VS_CRT_Geom; - PixelShader = PS_CRT_Geom; - } -} diff --git a/data/resources/shaders/reshade/Textures/CRT-LUT-1.png b/data/resources/shaders/reshade/Textures/CRT-LUT-1.png deleted file mode 100644 index b5f1e5af1..000000000 Binary files a/data/resources/shaders/reshade/Textures/CRT-LUT-1.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/CRT-LUT-2.png b/data/resources/shaders/reshade/Textures/CRT-LUT-2.png deleted file mode 100644 index c9033bc58..000000000 Binary files a/data/resources/shaders/reshade/Textures/CRT-LUT-2.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/CRT-LUT-3.png b/data/resources/shaders/reshade/Textures/CRT-LUT-3.png deleted file mode 100644 index 604dc1988..000000000 Binary files a/data/resources/shaders/reshade/Textures/CRT-LUT-3.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/CRT-LUT-4.png b/data/resources/shaders/reshade/Textures/CRT-LUT-4.png deleted file mode 100644 index 323ec71c9..000000000 Binary files a/data/resources/shaders/reshade/Textures/CRT-LUT-4.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/crt-cyclon/bezel.png b/data/resources/shaders/reshade/Textures/crt-cyclon/bezel.png deleted file mode 100644 index e654d8ae8..000000000 Binary files a/data/resources/shaders/reshade/Textures/crt-cyclon/bezel.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png deleted file mode 100644 index 2995ae5f4..000000000 Binary files a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png deleted file mode 100644 index 2c3f21eed..000000000 Binary files a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMask.png b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMask.png deleted file mode 100644 index ca4095649..000000000 Binary files a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMask.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskEDP.png b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskEDP.png deleted file mode 100644 index a3844dc2a..000000000 Binary files a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskEDP.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png deleted file mode 100644 index b61d92a0e..000000000 Binary files a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskResizeTo64.png b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskResizeTo64.png deleted file mode 100644 index 9b66ffba3..000000000 Binary files a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearShadowMaskResizeTo64.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png deleted file mode 100644 index eb20b2316..000000000 Binary files a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png b/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png deleted file mode 100644 index df518db57..000000000 Binary files a/data/resources/shaders/reshade/Textures/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png and /dev/null differ diff --git a/data/resources/shaders/reshade/Textures/overlay/OVERLAY_CREDITS_AND_LICENSE.md b/data/resources/shaders/reshade/Textures/overlay/OVERLAY_CREDITS_AND_LICENSE.md deleted file mode 100644 index bd322c094..000000000 --- a/data/resources/shaders/reshade/Textures/overlay/OVERLAY_CREDITS_AND_LICENSE.md +++ /dev/null @@ -1,15 +0,0 @@ -# To Use - -Choose Aspect Ratio: Stretch to Fill. - -# Psx.jpg Credits - -To the Author: SOQUEROEU. - -The "psx.jpg" background was edited from the one obtained from "Soqueroeu TV Backgrounds 2.0" repository: https://github.com/soqueroeu/Soqueroeu-TV-Backgrounds_V2.0/tree/main. - -The material is free to use according to the agreement below: - -## AGREEMENT - -This pack is free. You should not pay for anything related to this graphics pack and shader preset. You may distribute and reproduce part from this content, as long as you give credit to the authors involved. You may not profit from the sale of products that contain material in this package without the author's prior permission. diff --git a/data/resources/shaders/reshade/Textures/overlay/psx.jpg b/data/resources/shaders/reshade/Textures/overlay/psx.jpg deleted file mode 100644 index 2663817fb..000000000 Binary files a/data/resources/shaders/reshade/Textures/overlay/psx.jpg and /dev/null differ diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 8633de9ae..8bb639644 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -209,6 +209,29 @@ void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limi } else if (texture_filter == GPUTextureFilter::JINC2 || texture_filter == GPUTextureFilter::JINC2BinAlpha) { + /* + Hyllian's jinc windowed-jinc 2-lobe sharper with anti-ringing Shader + + Copyright (C) 2011-2016 Hyllian/Jararaca - sergiogdb@gmail.com + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ DefineMacro(ss, "BINALPHA", texture_filter == GPUTextureFilter::JINC2BinAlpha); ss << R"( CONSTANT float JINC2_WINDOW_SINC = 0.44; @@ -361,6 +384,30 @@ void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limi } else if (texture_filter == GPUTextureFilter::xBR || texture_filter == GPUTextureFilter::xBRBinAlpha) { + /* + Hyllian's xBR-vertex code and texel mapping + + Copyright (C) 2011/2016 Hyllian - sergiogdb@gmail.com + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + DefineMacro(ss, "BINALPHA", texture_filter == GPUTextureFilter::xBRBinAlpha); ss << R"( CONSTANT int BLEND_NONE = 0; diff --git a/src/duckstation-qt/duckstation-qt.vcxproj b/src/duckstation-qt/duckstation-qt.vcxproj index af91a4dd4..cb1864176 100644 --- a/src/duckstation-qt/duckstation-qt.vcxproj +++ b/src/duckstation-qt/duckstation-qt.vcxproj @@ -261,9 +261,6 @@ - - - diff --git a/src/duckstation-qt/duckstation-qt.vcxproj.filters b/src/duckstation-qt/duckstation-qt.vcxproj.filters index 3f57a9749..dd120bc34 100644 --- a/src/duckstation-qt/duckstation-qt.vcxproj.filters +++ b/src/duckstation-qt/duckstation-qt.vcxproj.filters @@ -290,9 +290,6 @@ - - - diff --git a/src/duckstation-qt/qt5.natvis b/src/duckstation-qt/qt5.natvis deleted file mode 100644 index d7d8854f2..000000000 --- a/src/duckstation-qt/qt5.natvis +++ /dev/null @@ -1,712 +0,0 @@ - - - - - - - - {{ x = {xp}, y = {yp} }} - - xp - yp - - - - - {{ x = {x1}, y = {y1}, width = {x2 - x1 + 1}, height = {y2 - y1 + 1} }} - - x1 - y1 - x2 - x1 + 1 - y2 - y1 + 1 - - - - - {{ x = {xp}, y = {yp}, width = {w}, height = {h} }} - - xp - yp - w - h - - - - - - {{ width = {wd}, height = {ht} }} - - wd - ht - - - - - - {{ start point = {pt1}, end point = {pt2} }} - - - {pt1} - - pt1 - - - - {pt2} - - pt2 - - - - - - - - {{ size = {d->size} }} - - d->ref.atomic._q_value - - d->size - (QPoint*)((reinterpret_cast<char*>(d)) + d->offset) - - - - - - {{ size = {d->size} }} - - - d->size > 0 - && ((((QPointF*)((reinterpret_cast<char*>(d)) + d->offset)[0]).xp - == (((QPointF*)((reinterpret_cast<char*>(d)) + d->offset)[d->size - 1]).xp) - && ((((QPointF*)((reinterpret_cast<char*>(d)) + d->offset)[0]).yp - == (((QPointF*)((reinterpret_cast<char*>(d)) + d->offset)[d->size - 1]).yp) - - d->ref.atomic._q_value - - d->size - (QPointF*)((reinterpret_cast<char*>(d)) + d->offset) - - - - - - {{ x = {xp}, y = {yp} }} - - xp - yp - - - - - {{ x = {xp}, y = {yp}, z = {zp} }} - - xp - yp - zp - - - - - {{ x = {xp}, y = {yp}, z = {zp}, w = {wp} }} - - xp - yp - zp - wp - - - - - - {{ m11 = {_m11}, m12 = {_m12}, m21 = {_m21}, m22 = {_m22}, ... }} - - - _m11 - _m12 - _m21 - _m22 - _dx - _dy - - - - - - {{ m11 = {m[0][0]}, m12 = {m[1][0]}, m13 = {m[2][0]}, m14 = {m[3][0]}, ... }} - - - m[0][0] - m[1][0] - m[2][0] - m[3][0] - m[0][1] - m[1][1] - m[2][1] - m[3][1] - m[0][2] - m[1][2] - m[2][2] - m[3][2] - m[0][3] - m[1][3] - m[2][3] - m[3][3] - - - - - - {{ horizontal = {static_cast<Policy>(bits.horPolicy)}, vertical = {static_cast<Policy>(bits.verPolicy)}, type = {ControlType(1 << bits.ctype)} }} - - - - QSizePolicy::Policy::{static_cast<Policy>(bits.verPolicy)} - - - QSizePolicy::Policy::{static_cast<Policy>(bits.horPolicy)} - - - QSizePolicy::ControlType::{ControlType(1 << bits.ctype)} - - - - Qt::Vertical (2) - - - Qt::Horizontal (1) - - - static_cast<int>(bits.verStretch) - static_cast<int>(bits.horStretch) - bits.hfw == 1 - bits.wfh == 1 - - - - - {ucs,c} - ucs,c - - ucs > 0xff ? '\0' : char(ucs),c - ucs,c - - - - - {((reinterpret_cast<unsigned short*>(d)) + d->offset / 2),sub} - ((reinterpret_cast<unsigned short*>(d)) + d->offset / 2),sub - - d->size - d->ref.atomic._q_value - - d->size - ((reinterpret_cast<unsigned short*>(d)) + d->offset / 2),c - - - - - - {((reinterpret_cast<char*>(d)) + d->offset),sb} - ((reinterpret_cast<char*>(d)) + d->offset),sb - - d->size - d->ref.atomic._q_value - - d->size - ((reinterpret_cast<char*>(d)) + d->offset),c - - - - - - {{ size = {(d.d->size << 3) - *((reinterpret_cast<char*>(d.d)) + d.d->offset)} }} - - d.d->ref.atomic._q_value - - (d.d->size << 3) - *((reinterpret_cast<char*>(d.d)) + d.d->offset) - - (*(reinterpret_cast<const unsigned char*>((reinterpret_cast<char*>(d.d)) + d.d->offset) + 1 - + ($i >> 3)) & (1 << ($i & 7))) != 0 - - - - - - - - {{ size = {s} }} - - a - - s - ptr - - - - - - {{ julian day = {jd} }} - - - - - {{ millisecond = {mds} }} - {{ milliseconds = {mds} }} - - mds / 3600000, d - mds / 3600000, d - (mds % 3600000) / 60000, d - (mds % 3600000) / 60000, d - (mds / 1000) % 60, d - (mds / 1000) % 60, d - mds % 1000, d - mds % 1000, d - - - - - {d.pattern} - - - - - ref._q_value - - - - - strong reference to shared pointer of type {"$T1"} - - value == 0 - d->weakref._q_value - d->strongref._q_value - - - - - pointer to implicit shared object of type {"$T1"} - - d - - - - - pointer to explicit shared object of type {"$T1"} - - d - - - - - guarded pointer to subclass of QObject of type {"$T1"} - - wp.d == 0 || wp.d->strongref._q_value == 0 || wp.value == 0 - - - - - weak reference to shared pointer of type {"$T1"} - - d == 0 || d->strongref._q_value == 0 || value == 0 - d->weakref._q_value - d->strongref._q_value - - - - - scoped pointer to a dynamically allocated object of type {"$T1"} - - !d - - - - - scoped pointer to dynamically allocated array of objects of type {"$T1"} - - !d - - - - - ({first}, {second}) - - first - second - - - - - - {{ size = {d->size} }} - - d->ref.atomic._q_value - - d->size - ($T1*)((reinterpret_cast<char*>(d)) + d->offset) - - - - - - - - {{ size = {d->end - d->begin} }} - - d->ref.atomic._q_value - - d->end - d->begin - *reinterpret_cast<$T1*>((sizeof($T1) > sizeof(void*)) - ? reinterpret_cast<Node*>(d->array + d->begin + $i)->v - : reinterpret_cast<$T1*>(d->array + d->begin + $i)) - - - - - - - {{ size = {d->size} }} - - d->ref.atomic._q_value - - d->size - d->n - n - (*(QLinkedListNode<$T1>*)this).t - - - - - - ({key}, {value}) - - key - value - - - - - - {{ size = {d->size} }} - - d->ref.atomic._q_value - - d->size - d->header.left - left - right - *((QMapNode<$T1,$T2>*)this) - - - - - - (empty) - ({key}, {value}) - - key - value - - - - - - {{ size = {d->size} }} - - d->ref.atomic._q_value - - d->numBuckets - *((QHashNode<$T1,$T2>*)d->buckets[$i]) - - - - - - (empty) - ({key}) - - key - - - - - {{ size = {q_hash.d->size} }} - - q_hash - - - - - ({*keyPtr}, {*t}) - - *keyPtr - *t - - - - - {{ size = {hash.d->size} }} - - mx - total - hash.d->ref.atomic._q_value - - hash.d->size - f - n - *((Node*)this) - - - - - - - - Invalid - {d.data.b} - {d.data.i} - {d.data.u} - {d.data.ll} - {d.data.ull} - {d.data.d} - {d.data.c} - - {*((QMap<QString,QVariant>*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QList<QVariant>*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QString*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QStringList*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QByteArray*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QBitArray*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QDate*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QTime*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - DateTime - Url - Locale - - {*((QRect*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QRectF*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QSize*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QSizeF*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QLine*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QLineF*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QPoint*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - - {*((QPointF*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - RegExp - RegularExpression - - {*((QHash<QString,QVariant>*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr)))} - - EasingCurve - Uuid - ModelIndex - LastCoreType - Font - Pixmap - Brush - Color - Palette - Image - Polygon - Region - Bitmap - Cursor - KeySequence - Pen - TextLength - TextFormat - Matrix - Transform - Matrix4x4 - Vector2D - Vector3D - Vector4D - Quaternion - PolygonF - Icon - LastGuiType - SizePolicy - UserType - LastType - - - - - - d.data.c - - - *((QString*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - - *((QByteArray*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - - - - - - - *((QMap<QString,QVariant>*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QList<QVariant>*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QString*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QStringList*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QByteArray*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QBitArray*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QDate*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QTime*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QRect*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QRectF*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QSize*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QSizeF*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QLine*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QLineF*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QPoint*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QPointF*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - *((QHash<QString,QVariant>*)(d.is_shared ? d.data.shared->ptr - : reinterpret_cast<const void *>(&d.data.ptr))) - - - - - - -