Merge pull request #11999 from Filoppi/post_process_fixes

Video: implement output resampling (upscaling/downscaling) methods
2023-08-18 20:33:09 +02:00 · 2023-08-18 20:33:09 +02:00 · 3441fe6efc
parent a9ec2a6e41 80b453082d
commit 3441fe6efc
10 changed files with 490 additions and 96 deletions
--- a/Data/Sys/Shaders/default_pre_post_process.glsl
+++ b/Data/Sys/Shaders/default_pre_post_process.glsl
@ -1,4 +1,6 @@
-// References:
+/***** COLOR CORRECTION *****/
+
+// Color Space references:
 // https://www.unravel.com.au/understanding-color-spaces

 // SMPTE 170M - BT.601 (NTSC-M) -> BT.709
@ -21,8 +23,8 @@ mat3 from_PAL = transpose(mat3(

 float3 LinearTosRGBGamma(float3 color)
 {
-	float a = 0.055;
-	
+	const float a = 0.055;
+
 	for (int i = 0; i < 3; ++i)
 	{
 		float x = color[i];
@ -36,17 +38,336 @@ float3 LinearTosRGBGamma(float3 color)
 	return color;
 }

+/***** COLOR SAMPLING *****/
+
+// Non filtered gamma corrected sample (nearest neighbor)
+float4 QuickSample(float3 uvw, float gamma)
+{
+#if 0 // Test sampling range
+	const float threshold = 0.00000001;
+	float2 xy = uvw.xy * GetResolution();
+	// Sampling outside the valid range, draw in yellow
+	if (xy.x < (0.0 - threshold) || xy.x > (GetResolution().x + threshold) || xy.y < (0.0 - threshold) || xy.y > (GetResolution().y + threshold))
+		return float4(1.0, 1.0, 0.0, 1);
+	// Sampling at the edges, draw in purple
+	if (xy.x < 1.0 || xy.x > (GetResolution().x - 1.0) || xy.y < 1.0 || xy.y > (GetResolution().y - 1.0))
+		return float4(0.5, 0, 0.5, 1);
+#endif
+
+	float4 color = texture(samp1, uvw);
+	color.rgb = pow(color.rgb, float3(gamma));
+	return color;
+}
+float4 QuickSample(float2 uv, float w, float gamma)
+{
+	return QuickSample(float3(uv, w), gamma);
+}
+float4 QuickSampleByPixel(float2 xy, float w, float gamma)
+{
+	float3 uvw = float3(xy * GetInvResolution(), w);
+	return QuickSample(uvw, gamma);
+}
+
+/***** Bilinear Interpolation *****/
+
+float4 BilinearSample(float3 uvw, float gamma)
+{
+	// This emulates the (bi)linear filtering done directly from GPUs HW.
+	// Note that GPUs might natively filter red green and blue differently, but we don't do it.
+	// They might also use different filtering between upscaling and downscaling.
+	float2 source_size = GetResolution();
+	float2 pixel = (uvw.xy * source_size) - 0.5; // Try to find the matching pixel top left corner
+
+	// Find the integer and floating point parts
+	float2 int_pixel = floor(pixel);
+	float2 frac_pixel = fract(pixel);
+
+	// Take 4 samples around the original uvw
+	float4 c11 = QuickSampleByPixel(int_pixel + float2(0.5, 0.5), uvw.z, gamma);
+	float4 c21 = QuickSampleByPixel(int_pixel + float2(1.5, 0.5), uvw.z, gamma);
+	float4 c12 = QuickSampleByPixel(int_pixel + float2(0.5, 1.5), uvw.z, gamma);
+	float4 c22 = QuickSampleByPixel(int_pixel + float2(1.5, 1.5), uvw.z, gamma);
+
+	// Blend the 4 samples by their weight
+	return lerp(lerp(c11, c21, frac_pixel.x), lerp(c12, c22, frac_pixel.x), frac_pixel.y);
+}
+
+/***** Bicubic Interpolation *****/
+
+// Formula derived from:
+// https://en.wikipedia.org/wiki/Mitchell%E2%80%93Netravali_filters#Definition
+// Values from:
+// https://guideencodemoe-mkdocs.readthedocs.io/encoding/resampling/#mitchell-netravali-bicubic
+// Other references:
+// https://www.codeproject.com/Articles/236394/Bi-Cubic-and-Bi-Linear-Interpolation-with-GLSL
+// https://github.com/ValveSoftware/gamescope/pull/740
+// https://stackoverflow.com/questions/13501081/efficient-bicubic-filtering-code-in-glsl
+#define CUBIC_COEFF_GEN(B, C)																						\
+	(mat4(/* t^0 */ ((B) / 6.0), (-(B) / 3.0 + 1.0), ((B) / 6.0), (0.0),	\
+		/* t^1 */ (-(B) / 2.0 - (C)), (0.0), ((B) / 2.0 + (C)), (0.0),			\
+		/* t^2 */ ((B) / 2.0 + 2.0 * (C)), (2.0 * (B) + (C)-3.0),						\
+		(-5.0 * (B) / 2.0 - 2.0 * (C) + 3.0), (-(C)),												\
+		/* t^3 */ (-(B) / 6.0 - (C)), (-3.0 * (B) / 2.0 - (C) + 2.0),				\
+		(3.0 * (B) / 2.0 + (C)-2.0), ((B) / 6.0 + (C))))
+
+float4 CubicCoeffs(float t, mat4 coeffs)
+{
+	return coeffs * float4(1.0, t, t * t, t * t * t);
+}
+
+float4 CubicMix(float4 c0, float4 c1, float4 c2, float4 c3, float4 coeffs)
+{
+	return c0 * coeffs[0] + c1 * coeffs[1] + c2 * coeffs[2] + c3 * coeffs[3];
+}
+
+// By Sam Belliveau. Public Domain license.
+// Simple 16 tap, gamma correct, implementation of bicubic filtering.
+float4 BicubicSample(float3 uvw, float gamma, mat4 coeffs)
+{
+	float2 pixel = (uvw.xy * GetResolution()) - 0.5;
+	float2 int_pixel = floor(pixel);
+	float2 frac_pixel = fract(pixel);
+
+	float4 c00 = QuickSampleByPixel(int_pixel + float2(-0.5, -0.5), uvw.z, gamma);
+	float4 c10 = QuickSampleByPixel(int_pixel + float2(+0.5, -0.5), uvw.z, gamma);
+	float4 c20 = QuickSampleByPixel(int_pixel + float2(+1.5, -0.5), uvw.z, gamma);
+	float4 c30 = QuickSampleByPixel(int_pixel + float2(+2.5, -0.5), uvw.z, gamma);
+
+	float4 c01 = QuickSampleByPixel(int_pixel + float2(-0.5, +0.5), uvw.z, gamma);
+	float4 c11 = QuickSampleByPixel(int_pixel + float2(+0.5, +0.5), uvw.z, gamma);
+	float4 c21 = QuickSampleByPixel(int_pixel + float2(+1.5, +0.5), uvw.z, gamma);
+	float4 c31 = QuickSampleByPixel(int_pixel + float2(+2.5, +0.5), uvw.z, gamma);
+
+	float4 c02 = QuickSampleByPixel(int_pixel + float2(-0.5, +1.5), uvw.z, gamma);
+	float4 c12 = QuickSampleByPixel(int_pixel + float2(+0.5, +1.5), uvw.z, gamma);
+	float4 c22 = QuickSampleByPixel(int_pixel + float2(+1.5, +1.5), uvw.z, gamma);
+	float4 c32 = QuickSampleByPixel(int_pixel + float2(+2.5, +1.5), uvw.z, gamma);
+
+	float4 c03 = QuickSampleByPixel(int_pixel + float2(-0.5, +2.5), uvw.z, gamma);
+	float4 c13 = QuickSampleByPixel(int_pixel + float2(+0.5, +2.5), uvw.z, gamma);
+	float4 c23 = QuickSampleByPixel(int_pixel + float2(+1.5, +2.5), uvw.z, gamma);
+	float4 c33 = QuickSampleByPixel(int_pixel + float2(+2.5, +2.5), uvw.z, gamma);
+
+	float4 cx = CubicCoeffs(frac_pixel.x, coeffs);
+	float4 cy = CubicCoeffs(frac_pixel.y, coeffs);
+
+	float4 x0 = CubicMix(c00, c10, c20, c30, cx);
+	float4 x1 = CubicMix(c01, c11, c21, c31, cx);
+	float4 x2 = CubicMix(c02, c12, c22, c32, cx);
+	float4 x3 = CubicMix(c03, c13, c23, c33, cx);
+
+	return CubicMix(x0, x1, x2, x3, cy);
+}
+
+/***** Sharp Bilinear Filtering *****/
+
+// Based on https://github.com/libretro/slang-shaders/blob/master/interpolation/shaders/sharp-bilinear.slang
+// by Themaister, Public Domain license
+// Does a bilinear stretch, with a preapplied Nx nearest-neighbor scale,
+// giving a sharper image than plain bilinear.
+float4 SharpBilinearSample(float3 uvw, float gamma)
+{
+	float2 source_size = GetResolution();
+	float2 inverted_source_size = GetInvResolution();
+	float2 target_size = GetWindowResolution();
+	float2 texel = uvw.xy * source_size;
+	float2 texel_floored = floor(texel);
+	float2 s = fract(texel);
+	float scale = ceil(max(target_size.x * inverted_source_size.x, target_size.y * inverted_source_size.y));
+	float region_range = 0.5 - (0.5 / scale);
+
+	// Figure out where in the texel to sample to get correct pre-scaled bilinear.
+
+	float2 center_dist = s - 0.5;
+	float2 f = ((center_dist - clamp(center_dist, -region_range, region_range)) * scale) + 0.5;
+
+	float2 mod_texel = texel_floored + f;
+
+	uvw.xy = mod_texel * inverted_source_size;
+	return BilinearSample(uvw, gamma);
+}
+
+/***** Area Sampling *****/
+
+// By Sam Belliveau and Filippo Tarpini. Public Domain license.
+// Effectively a more accurate sharp bilinear filter when upscaling,
+// that also works as a mathematically perfect downscale filter.
+// https://entropymine.com/imageworsener/pixelmixing/
+// https://github.com/obsproject/obs-studio/pull/1715
+// https://legacy.imagemagick.org/Usage/filter/
+float4 AreaSampling(float3 uvw, float gamma)
+{
+	// Determine the sizes of the source and target images.
+	float2 source_size = GetResolution();
+	float2 inverted_target_size = GetInvWindowResolution();
+
+	// Determine the range of the source image that the target pixel will cover.
+	// Workaround: shift the resolution by 1/4 pixel to align the results with other sampling algorithms,
+	// otherwise the results would be offsetted, and we'd be sampling from coordinates outside the valid range.
+	float2 adjusted_source_size = source_size - 0.25;
+	float2 range = adjusted_source_size * inverted_target_size;
+	float2 beg = (uvw.xy * adjusted_source_size) - (range * 0.5);
+	float2 end = beg + range;
+
+	// Compute the top-left and bottom-right corners of the pixel box.
+	float2 f_beg = floor(beg);
+	float2 f_end = floor(end);
+
+	// Compute how much of the start and end pixels are covered horizontally & vertically.
+	float area_w = 1.0 - fract(beg.x);
+	float area_n = 1.0 - fract(beg.y);
+	float area_e = fract(end.x);
+	float area_s = fract(end.y);
+
+	// Compute the areas of the corner pixels in the pixel box.
+	float area_nw = area_n * area_w;
+	float area_ne = area_n * area_e;
+	float area_sw = area_s * area_w;
+	float area_se = area_s * area_e;
+
+	// Initialize the color accumulator.
+	float4 avg_color = float4(0.0, 0.0, 0.0, 0.0);
+
+	// Prevents rounding errors due to the coordinates flooring above
+	const float2 offset = float2(0.5, 0.5);
+
+	// Accumulate corner pixels.
+	avg_color += area_nw * QuickSampleByPixel(float2(f_beg.x, f_beg.y) + offset, uvw.z, gamma);
+	avg_color += area_ne * QuickSampleByPixel(float2(f_end.x, f_beg.y) + offset, uvw.z, gamma);
+	avg_color += area_sw * QuickSampleByPixel(float2(f_beg.x, f_end.y) + offset, uvw.z, gamma);
+	avg_color += area_se * QuickSampleByPixel(float2(f_end.x, f_end.y) + offset, uvw.z, gamma);
+	
+	// Determine the size of the pixel box.
+	int x_range = int(f_end.x - f_beg.x + 0.5);
+	int y_range = int(f_end.y - f_beg.y + 0.5);
+
+	// Workaround to compile the shader with DX11/12.
+	// If this isn't done, it will complain that the loop could have too many iterations.
+	// This number should be enough to guarantee downscaling from very high to very small resolutions.
+	// Note that this number might be referenced in the UI.
+	const int max_iterations = 16;
+
+	// Fix up the average calculations in case we reached the upper limit
+	x_range = min(x_range, max_iterations);
+	y_range = min(y_range, max_iterations);
+
+	// Accumulate top and bottom edge pixels.
+	for (int ix = 0; ix < max_iterations; ++ix)
+	{
+		if (ix < x_range)
+		{
+			float x = f_beg.x + 1.0 + float(ix);
+			avg_color += area_n * QuickSampleByPixel(float2(x, f_beg.y) + offset, uvw.z, gamma);
+			avg_color += area_s * QuickSampleByPixel(float2(x, f_end.y) + offset, uvw.z, gamma);
+		}
+	}
+
+	// Accumulate left and right edge pixels and all the pixels in between.
+	for (int iy = 0; iy < max_iterations; ++iy)
+	{
+		if (iy < y_range)
+		{
+			float y = f_beg.y + 1.0 + float(iy);
+
+			avg_color += area_w * QuickSampleByPixel(float2(f_beg.x, y) + offset, uvw.z, gamma);
+			avg_color += area_e * QuickSampleByPixel(float2(f_end.x, y) + offset, uvw.z, gamma);
+
+			for (int ix = 0; ix < max_iterations; ++ix)
+			{
+				if (ix < x_range)
+				{
+					float x = f_beg.x + 1.0 + float(ix);
+					avg_color += QuickSampleByPixel(float2(x, y) + offset, uvw.z, gamma);
+				}
+			}
+		}
+	}
+
+	// Compute the area of the pixel box that was sampled.
+	float area_corners = area_nw + area_ne + area_sw + area_se;
+	float area_edges = float(x_range) * (area_n + area_s) + float(y_range) * (area_w + area_e);
+	float area_center = float(x_range) * float(y_range);
+
+	// Return the normalized average color.
+	return avg_color / (area_corners + area_edges + area_center);
+}
+
+/***** Main Functions *****/
+
+// Returns an accurate (gamma corrected) sample of a gamma space space texture.
+// Outputs in linear space for simplicity.
+float4 LinearGammaCorrectedSample(float gamma)
+{
+	float3 uvw = v_tex0;
+	float4 color = float4(0, 0, 0, 1);
+
+	if (resampling_method <= 1) // Bilinear
+	{
+		color = BilinearSample(uvw, gamma);
+	}
+	else if (resampling_method == 2) // Bicubic: B-Spline
+	{
+		color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(1.0, 0.0));
+	}
+	else if (resampling_method == 3) // Bicubic: Mitchell-Netravali
+	{
+		color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(1.0 / 3.0, 1.0 / 3.0));
+	}
+	else if (resampling_method == 4) // Bicubic: Catmull-Rom
+	{
+		color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(0.0, 0.5));
+	}
+	else if (resampling_method == 5) // Sharp Bilinear
+	{
+		color = SharpBilinearSample(uvw, gamma);
+	}
+	else if (resampling_method == 6) // Area Sampling
+	{
+		color = AreaSampling(uvw, gamma);
+	}
+	else if (resampling_method == 7) // Nearest Neighbor
+	{
+		color = QuickSample(uvw, gamma);
+	}
+	else if (resampling_method == 8) // Bicubic: Hermite
+	{
+		color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(0.0, 0.0));
+	}
+
+	return color;
+}
+
 void main()
 {
-	// Note: sampling in gamma space is "wrong" if the source
-	// and target resolution don't match exactly.
-	// Fortunately at the moment here they always should but to do this correctly,
-	// we'd need to sample from 4 pixels, de-apply the gamma from each of these,
-	// and then do linear sampling on their corrected value.
-	float4 color = Sample();
+	// This tries to fall back on GPU HW sampling if it can (it won't be gamma corrected).
+	bool raw_resampling = resampling_method <= 0;
+	bool needs_rescaling = GetResolution() != GetWindowResolution();

-	// Convert to linear space to do any other kind of operation
-	color.rgb = pow(color.rgb, float3(game_gamma));
+	bool needs_resampling = needs_rescaling && (OptionEnabled(hdr_output) || OptionEnabled(correct_gamma) || !raw_resampling);
+
+	float4 color;
+
+	if (needs_resampling)
+	{
+		// Doing linear sampling in "gamma space" on linear texture formats isn't correct.
+		// If the source and target resolutions don't match, the GPU will return a color
+		// that is the average of 4 gamma space colors, but gamma space colors can't be blended together,
+		// gamma neeeds to be de-applied first. This makes a big difference if colors change
+		// drastically between two pixels.
+
+		color = LinearGammaCorrectedSample(game_gamma);
+	}
+	else
+	{
+		// Default GPU HW sampling. Bilinear is identical to Nearest Neighbor if the input and output resolutions match.
+		if (needs_rescaling)
+			color = texture(samp0, v_tex0);
+		else
+			color = texture(samp1, v_tex0);
+
+		// Convert to linear before doing any other of follow up operations.
+		color.rgb = pow(color.rgb, float3(game_gamma));
+	}

 	if (OptionEnabled(correct_color_space))
 	{
@ -57,13 +378,13 @@ void main()
 		else if (game_color_space == 2)
 			color.rgb = color.rgb * from_PAL;
 	}
-	
+
 	if (OptionEnabled(hdr_output))
 	{
 		float hdr_paper_white = hdr_paper_white_nits / hdr_sdr_white_nits;
 		color.rgb *= hdr_paper_white;
 	}
-	
+
 	if (OptionEnabled(linear_space_output))
 	{
 		// Nothing to do here
--- a/Data/Sys/Shaders/sharp_bilinear.glsl
+++ b/Data/Sys/Shaders/sharp_bilinear.glsl
@ -1,47 +0,0 @@
-// Based on https://github.com/libretro/slang-shaders/blob/master/interpolation/shaders/sharp-bilinear.slang
-// by Themaister, Public Domain license
-// Does a bilinear stretch, with a preapplied Nx nearest-neighbor scale,
-// giving a sharper image than plain bilinear.
-
-/*
-[configuration]
-[OptionRangeFloat]
-GUIName = Prescale Factor (set to 0 for automatic)
-OptionName = PRESCALE_FACTOR
-MinValue = 0.0
-MaxValue = 16.0
-StepAmount = 1.0
-DefaultValue = 0.0
-[/configuration]
-*/
-
-float CalculatePrescale(float config_scale) {
-  if (config_scale == 0.0) {
-    float2 source_size = GetResolution();
-    float2 window_size = GetWindowResolution();
-    return ceil(max(window_size.x / source_size.x, window_size.y / source_size.y));
-  } else {
-    return config_scale;
-  }
-}
-
-void main()
-{
-  float2 source_size = GetResolution();
-  float2 texel = GetCoordinates() * source_size;
-  float2 texel_floored = floor(texel);
-  float2 s = fract(texel);
-  float config_scale = GetOption(PRESCALE_FACTOR);
-  float scale = CalculatePrescale(config_scale);
-  float region_range = 0.5 - 0.5 / scale;
-
-  // Figure out where in the texel to sample to get correct pre-scaled bilinear.
-  // Uses the hardware bilinear interpolator to avoid having to sample 4 times manually.
-
-  float2 center_dist = s - 0.5;
-  float2 f = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5;
-
-  float2 mod_texel = texel_floored + f;
-
-  SetOutput(SampleLocation(mod_texel / source_size));
-}
--- a/Source/Core/Core/Config/GraphicsSettings.cpp
+++ b/Source/Core/Core/Config/GraphicsSettings.cpp
@ -118,6 +118,8 @@ const Info<std::string> GFX_DRIVER_LIB_NAME{{System::GFX, "Settings", "DriverLib
 const Info<TextureFilteringMode> GFX_ENHANCE_FORCE_TEXTURE_FILTERING{
    {System::GFX, "Enhancements", "ForceTextureFiltering"}, TextureFilteringMode::Default};
 const Info<int> GFX_ENHANCE_MAX_ANISOTROPY{{System::GFX, "Enhancements", "MaxAnisotropy"}, 0};
+const Info<OutputResamplingMode> GFX_ENHANCE_OUTPUT_RESAMPLING{
+    {System::GFX, "Enhancements", "OutputResampling"}, OutputResamplingMode::Default};
 const Info<std::string> GFX_ENHANCE_POST_SHADER{
    {System::GFX, "Enhancements", "PostProcessingShader"}, ""};
 const Info<bool> GFX_ENHANCE_FORCE_TRUE_COLOR{{System::GFX, "Enhancements", "ForceTrueColor"},
--- a/Source/Core/Core/Config/GraphicsSettings.h
+++ b/Source/Core/Core/Config/GraphicsSettings.h
@ -11,6 +11,7 @@ enum class AspectMode : int;
 enum class ShaderCompilationMode : int;
 enum class StereoMode : int;
 enum class TextureFilteringMode : int;
+enum class OutputResamplingMode : int;
 enum class ColorCorrectionRegion : int;
 enum class TriState : int;

@ -101,6 +102,7 @@ extern const Info<bool> GFX_MODS_ENABLE;

 extern const Info<TextureFilteringMode> GFX_ENHANCE_FORCE_TEXTURE_FILTERING;
 extern const Info<int> GFX_ENHANCE_MAX_ANISOTROPY;  // NOTE - this is x in (1 << x)
+extern const Info<OutputResamplingMode> GFX_ENHANCE_OUTPUT_RESAMPLING;
 extern const Info<std::string> GFX_ENHANCE_POST_SHADER;
 extern const Info<bool> GFX_ENHANCE_FORCE_TRUE_COLOR;
 extern const Info<bool> GFX_ENHANCE_DISABLE_COPY_FILTER;
--- a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp
+++ b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp
@ -105,6 +105,22 @@ void EnhancementsWidget::CreateWidgets()
  m_texture_filtering_combo->addItem(tr("Force Linear and 16x Anisotropic"),
                                     TEXTURE_FILTERING_FORCE_LINEAR_ANISO_16X);

+  m_output_resampling_combo = new ToolTipComboBox();
+  m_output_resampling_combo->addItem(tr("Default"),
+                                     static_cast<int>(OutputResamplingMode::Default));
+  m_output_resampling_combo->addItem(tr("Bilinear"),
+                                     static_cast<int>(OutputResamplingMode::Bilinear));
+  m_output_resampling_combo->addItem(tr("Bicubic: B-Spline"),
+                                     static_cast<int>(OutputResamplingMode::BSpline));
+  m_output_resampling_combo->addItem(tr("Bicubic: Mitchell-Netravali"),
+                                     static_cast<int>(OutputResamplingMode::MitchellNetravali));
+  m_output_resampling_combo->addItem(tr("Bicubic: Catmull-Rom"),
+                                     static_cast<int>(OutputResamplingMode::CatmullRom));
+  m_output_resampling_combo->addItem(tr("Sharp Bilinear"),
+                                     static_cast<int>(OutputResamplingMode::SharpBilinear));
+  m_output_resampling_combo->addItem(tr("Area Sampling"),
+                                     static_cast<int>(OutputResamplingMode::AreaSampling));
+
  m_configure_color_correction = new ToolTipPushButton(tr("Configure"));

  m_pp_effect = new ToolTipComboBox();
@ -136,6 +152,10 @@ void EnhancementsWidget::CreateWidgets()
  enhancements_layout->addWidget(m_texture_filtering_combo, row, 1, 1, -1);
  ++row;

+  enhancements_layout->addWidget(new QLabel(tr("Output Resampling:")), row, 0);
+  enhancements_layout->addWidget(m_output_resampling_combo, row, 1, 1, -1);
+  ++row;
+
  enhancements_layout->addWidget(new QLabel(tr("Color Correction:")), row, 0);
  enhancements_layout->addWidget(m_configure_color_correction, row, 1, 1, -1);
  ++row;
@ -195,6 +215,8 @@ void EnhancementsWidget::ConnectWidgets()
          [this](int) { SaveSettings(); });
  connect(m_texture_filtering_combo, qOverload<int>(&QComboBox::currentIndexChanged),
          [this](int) { SaveSettings(); });
+  connect(m_output_resampling_combo, qOverload<int>(&QComboBox::currentIndexChanged),
+          [this](int) { SaveSettings(); });
  connect(m_pp_effect, qOverload<int>(&QComboBox::currentIndexChanged),
          [this](int) { SaveSettings(); });
  connect(m_3d_mode, qOverload<int>(&QComboBox::currentIndexChanged), [this] {
@ -325,6 +347,14 @@ void EnhancementsWidget::LoadSettings()
    break;
  }

+  // Resampling
+  const OutputResamplingMode output_resampling_mode =
+      Config::Get(Config::GFX_ENHANCE_OUTPUT_RESAMPLING);
+  m_output_resampling_combo->setCurrentIndex(static_cast<int>(output_resampling_mode));
+
+  m_output_resampling_combo->setEnabled(g_Config.backend_info.bSupportsPostProcessing);
+
+  // Color Correction
  m_configure_color_correction->setEnabled(g_Config.backend_info.bSupportsPostProcessing);

  // Post Processing Shader
@ -413,6 +443,10 @@ void EnhancementsWidget::SaveSettings()
    break;
  }

+  const int output_resampling_selection = m_output_resampling_combo->currentData().toInt();
+  Config::SetBaseOrCurrent(Config::GFX_ENHANCE_OUTPUT_RESAMPLING,
+                           static_cast<OutputResamplingMode>(output_resampling_selection));
+
  const bool anaglyph = g_Config.stereo_mode == StereoMode::Anaglyph;
  const bool passive = g_Config.stereo_mode == StereoMode::Passive;
  Config::SetBaseOrCurrent(Config::GFX_ENHANCE_POST_SHADER,
@ -455,6 +489,37 @@ void EnhancementsWidget::AddDescriptions()
      "scaling filter selected by the game.<br><br>Any option except 'Default' will alter the look "
      "of the game's textures and might cause issues in a small number of "
      "games.<br><br><dolphin_emphasis>If unsure, select 'Default'.</dolphin_emphasis>");
+  static const char TR_OUTPUT_RESAMPLING_DESCRIPTION[] =
+      QT_TR_NOOP("Affects how the game output is scaled to the window resolution."
+                 "<br>The performance mostly depends on the number of samples each method uses."
+                 "<br>Compared to SSAA, resampling is useful in case the output window"
+                 "<br>resolution isn't a multiplier of the native emulation resolution."
+
+                 "<br><br><b>Default</b> - [fastest]"
+                 "<br>Internal GPU bilinear sampler which is not gamma corrected."
+                 "<br>This setting might be ignored if gamma correction is forced on."
+
+                 "<br><br><b>Bilinear</b> - [4 samples]"
+                 "<br>Gamma corrected linear interpolation between pixels."
+
+                 "<br><br><b>Bicubic</b> - [16 samples]"
+                 "<br>Gamma corrected cubic interpolation between pixels."
+                 "<br>Good when rescaling between close resolutions. i.e 1080p and 1440p."
+                 "<br>Comes in various flavors:"
+                 "<br><b>B-Spline</b>: Blurry, but avoids all lobing artifacts"
+                 "<br><b>Mitchell-Netravali</b>: Good middle ground between blurry and lobing"
+                 "<br><b>Catmull-Rom</b>: Sharper, but can cause lobing artifacts"
+
+                 "<br><br><b>Sharp Bilinear</b> - [1-4 samples]"
+                 "<br>Similarly to \"Nearest Neighbor\", it maintains a sharp look,"
+                 "<br>but also does some blending to avoid shimmering."
+                 "<br>Works best with 2D games at low resolutions."
+
+                 "<br><br><b>Area Sampling</b> - [up to 324 samples]"
+                 "<br>Weights pixels by the percentage of area they occupy. Gamma corrected."
+                 "<br>Best for down scaling by more than 2x."
+
+                 "<br><br><dolphin_emphasis>If unsure, select 'Default'.</dolphin_emphasis>");
  static const char TR_COLOR_CORRECTION_DESCRIPTION[] =
      QT_TR_NOOP("A group of features to make the colors more accurate, matching the color space "
                 "Wii and GC games were meant for.");
@ -537,6 +602,9 @@ void EnhancementsWidget::AddDescriptions()
  m_texture_filtering_combo->SetTitle(tr("Texture Filtering"));
  m_texture_filtering_combo->SetDescription(tr(TR_FORCE_TEXTURE_FILTERING_DESCRIPTION));

+  m_output_resampling_combo->SetTitle(tr("Output Resampling"));
+  m_output_resampling_combo->SetDescription(tr(TR_OUTPUT_RESAMPLING_DESCRIPTION));
+
  m_configure_color_correction->SetTitle(tr("Color Correction"));
  m_configure_color_correction->SetDescription(tr(TR_COLOR_CORRECTION_DESCRIPTION));

--- a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.h
+++ b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.h
@ -39,6 +39,7 @@ private:
  ConfigChoice* m_ir_combo;
  ToolTipComboBox* m_aa_combo;
  ToolTipComboBox* m_texture_filtering_combo;
+  ToolTipComboBox* m_output_resampling_combo;
  ToolTipComboBox* m_pp_effect;
  ToolTipPushButton* m_configure_color_correction;
  QPushButton* m_configure_pp_effect;
--- a/Source/Core/VideoCommon/PostProcessing.cpp
+++ b/Source/Core/VideoCommon/PostProcessing.cpp
@ -419,9 +419,9 @@ std::vector<std::string> PostProcessing::GetPassiveShaderList()
 bool PostProcessing::Initialize(AbstractTextureFormat format)
 {
  m_framebuffer_format = format;
-  // CompilePixelShader must be run first if configuration options are used.
+  // CompilePixelShader() must be run first if configuration options are used.
  // Otherwise the UBO has a different member list between vertex and pixel
-  // shaders, which is a link error.
+  // shaders, which is a link error on some backends.
  if (!CompilePixelShader() || !CompileVertexShader() || !CompilePipeline())
    return false;

@ -486,23 +486,29 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,

  MathUtil::Rectangle<int> src_rect = src;
  g_gfx->SetSamplerState(0, RenderState::GetLinearSamplerState());
+  g_gfx->SetSamplerState(1, RenderState::GetPointSamplerState());
  g_gfx->SetTexture(0, src_tex);
+  g_gfx->SetTexture(1, src_tex);

-  const bool is_color_correction_active = IsColorCorrectionActive();
+  const bool needs_color_correction = IsColorCorrectionActive();
+  // Rely on the default (bi)linear sampler with the default mode
+  // (it might not be gamma corrected).
+  const bool needs_resampling =
+      g_ActiveConfig.output_resampling_mode > OutputResamplingMode::Default;
  const bool needs_intermediary_buffer = NeedsIntermediaryBuffer();
+  const bool needs_default_pipeline = needs_color_correction || needs_resampling;
  const AbstractPipeline* final_pipeline = m_pipeline.get();
  std::vector<u8>* uniform_staging_buffer = &m_default_uniform_staging_buffer;
  bool default_uniform_staging_buffer = true;
+  const MathUtil::Rectangle<int> present_rect = g_presenter->GetTargetRectangle();

  // Intermediary pass.
-  // We draw to a high quality intermediary texture for two reasons:
+  // We draw to a high quality intermediary texture for a couple reasons:
+  // -Consistently do high quality gamma corrected resampling (upscaling/downscaling)
  // -Keep quality for gamma and gamut conversions, and HDR output
  //  (low bit depths lose too much quality with gamma conversions)
-  // -We make a texture of the exact same res as the source one,
-  //  because all the post process shaders we already had assume that
-  //  the source texture size (EFB) is different from the swap chain
-  //  texture size (which matches the window size).
-  if (m_default_pipeline && is_color_correction_active && needs_intermediary_buffer)
+  // -Keep the post process phase in linear space, to better operate with colors
+  if (m_default_pipeline && needs_default_pipeline && needs_intermediary_buffer)
  {
    AbstractFramebuffer* const previous_framebuffer = g_gfx->GetCurrentFramebuffer();

@ -512,13 +518,18 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
    // so it would be a waste to allocate two layers (see "bUsesExplictQuadBuffering").
    const u32 target_layers = copy_all_layers ? src_tex->GetLayers() : 1;

+    const u32 target_width =
+        needs_resampling ? present_rect.GetWidth() : static_cast<u32>(src_rect.GetWidth());
+    const u32 target_height =
+        needs_resampling ? present_rect.GetHeight() : static_cast<u32>(src_rect.GetHeight());
+
    if (!m_intermediary_frame_buffer || !m_intermediary_color_texture ||
-        m_intermediary_color_texture.get()->GetWidth() != static_cast<u32>(src_rect.GetWidth()) ||
-        m_intermediary_color_texture.get()->GetHeight() != static_cast<u32>(src_rect.GetHeight()) ||
+        m_intermediary_color_texture.get()->GetWidth() != target_width ||
+        m_intermediary_color_texture.get()->GetHeight() != target_height ||
        m_intermediary_color_texture.get()->GetLayers() != target_layers)
    {
      const TextureConfig intermediary_color_texture_config(
-          src_rect.GetWidth(), src_rect.GetHeight(), 1, target_layers, src_tex->GetSamples(),
+          target_width, target_height, 1, target_layers, src_tex->GetSamples(),
          s_intermediary_buffer_format, AbstractTextureFlag_RenderTarget);
      m_intermediary_color_texture = g_gfx->CreateTexture(intermediary_color_texture_config,
                                                          "Intermediary post process texture");
@ -530,8 +541,8 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
    g_gfx->SetFramebuffer(m_intermediary_frame_buffer.get());

    FillUniformBuffer(src_rect, src_tex, src_layer, g_gfx->GetCurrentFramebuffer()->GetRect(),
-                      g_presenter->GetTargetRectangle(), uniform_staging_buffer->data(),
-                      !default_uniform_staging_buffer);
+                      present_rect, uniform_staging_buffer->data(), !default_uniform_staging_buffer,
+                      true);
    g_vertex_manager->UploadUtilityUniforms(uniform_staging_buffer->data(),
                                            static_cast<u32>(uniform_staging_buffer->size()));

@ -544,6 +555,7 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
    src_rect = m_intermediary_color_texture->GetRect();
    src_tex = m_intermediary_color_texture.get();
    g_gfx->SetTexture(0, src_tex);
+    g_gfx->SetTexture(1, src_tex);
    // The "m_intermediary_color_texture" has already copied
    // from the specified source layer onto its first one.
    // If we query for a layer that the source texture doesn't have,
@ -557,7 +569,7 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
    // If we have no custom user shader selected, and color correction
    // is active, directly run the fixed pipeline shader instead of
    // doing two passes, with the second one doing nothing useful.
-    if (m_default_pipeline && is_color_correction_active)
+    if (m_default_pipeline && needs_default_pipeline)
    {
      final_pipeline = m_default_pipeline.get();
    }
@ -580,8 +592,8 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
  if (final_pipeline)
  {
    FillUniformBuffer(src_rect, src_tex, src_layer, g_gfx->GetCurrentFramebuffer()->GetRect(),
-                      g_presenter->GetTargetRectangle(), uniform_staging_buffer->data(),
-                      !default_uniform_staging_buffer);
+                      present_rect, uniform_staging_buffer->data(), !default_uniform_staging_buffer,
+                      false);
    g_vertex_manager->UploadUtilityUniforms(uniform_staging_buffer->data(),
                                            static_cast<u32>(uniform_staging_buffer->size()));

@ -609,7 +621,11 @@ std::string PostProcessing::GetUniformBufferHeader(bool user_post_process) const
  // The first (but not necessarily only) source layer we target
  ss << "  int src_layer;\n";
  ss << "  uint time;\n";
+  ss << "  int graphics_api;\n";
+  // If true, it's an intermediary buffer (including the first), if false, it's the final one
+  ss << "  int intermediary_buffer;\n";

+  ss << "  int resampling_method;\n";
  ss << "  int correct_color_space;\n";
  ss << "  int game_color_space;\n";
  ss << "  int correct_gamma;\n";
@ -742,6 +758,7 @@ void SetOutput(float4 color)

 #define GetOption(x) (x)
 #define OptionEnabled(x) ((x) != 0)
+#define OptionDisabled(x) ((x) == 0)

 )";
  return ss.str();
@ -752,13 +769,9 @@ std::string PostProcessing::GetFooter() const
  return {};
 }

-bool PostProcessing::CompileVertexShader()
+std::string GetVertexShaderBody()
 {
  std::ostringstream ss;
-  // We never need the user selected post process custom uniforms in the vertex shader
-  const bool user_post_process = false;
-  ss << GetUniformBufferHeader(user_post_process);
-
  if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
  {
    ss << "VARYING_LOCATION(0) out VertexData {\n";
@ -779,21 +792,34 @@ bool PostProcessing::CompileVertexShader()

  // Vulkan Y needs to be inverted on every pass
  if (g_ActiveConfig.backend_info.api_type == APIType::Vulkan)
+  {
    ss << "  opos.y = -opos.y;\n";
-
-  std::string s2 = ss.str();
-  s2 += "}\n";
-  m_default_vertex_shader = g_gfx->CreateShaderFromSource(ShaderStage::Vertex, s2,
-                                                          "Default post-processing vertex shader");
-
-  // OpenGL Y needs to be inverted once only (in the last pass)
-  if (g_ActiveConfig.backend_info.api_type == APIType::OpenGL)
-    ss << "  opos.y = -opos.y;\n";
+  }
+  // OpenGL Y needs to be inverted in all passes except the last one
+  else if (g_ActiveConfig.backend_info.api_type == APIType::OpenGL)
+  {
+    ss << "  if (intermediary_buffer != 0)\n";
+    ss << "    opos.y = -opos.y;\n";
+  }

  ss << "}\n";
+  return ss.str();
+}

+bool PostProcessing::CompileVertexShader()
+{
+  std::ostringstream ss_default;
+  ss_default << GetUniformBufferHeader(false);
+  ss_default << GetVertexShaderBody();
+  m_default_vertex_shader = g_gfx->CreateShaderFromSource(ShaderStage::Vertex, ss_default.str(),
+                                                          "Default post-processing vertex shader");
+
+  std::ostringstream ss;
+  ss << GetUniformBufferHeader(true);
+  ss << GetVertexShaderBody();
  m_vertex_shader =
      g_gfx->CreateShaderFromSource(ShaderStage::Vertex, ss.str(), "Post-processing vertex shader");
+
  if (!m_default_vertex_shader || !m_vertex_shader)
  {
    PanicAlertFmt("Failed to compile post-processing vertex shader");
@ -816,6 +842,9 @@ struct BuiltinUniforms
  std::array<float, 4> src_rect;
  s32 src_layer;
  u32 time;
+  s32 graphics_api;
+  s32 intermediary_buffer;
+  s32 resampling_method;
  s32 correct_color_space;
  s32 game_color_space;
  s32 correct_gamma;
@ -839,7 +868,7 @@ void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle<int>& src,
                                       const AbstractTexture* src_tex, int src_layer,
                                       const MathUtil::Rectangle<int>& dst,
                                       const MathUtil::Rectangle<int>& wnd, u8* buffer,
-                                       bool user_post_process)
+                                       bool user_post_process, bool intermediary_buffer)
 {
  const float rcp_src_width = 1.0f / src_tex->GetWidth();
  const float rcp_src_height = 1.0f / src_tex->GetHeight();
@ -860,7 +889,10 @@ void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle<int>& src,
                               static_cast<float>(src.GetHeight()) * rcp_src_height};
  builtin_uniforms.src_layer = static_cast<s32>(src_layer);
  builtin_uniforms.time = static_cast<u32>(m_timer.ElapsedMs());
+  builtin_uniforms.graphics_api = static_cast<s32>(g_ActiveConfig.backend_info.api_type);
+  builtin_uniforms.intermediary_buffer = static_cast<s32>(intermediary_buffer);

+  builtin_uniforms.resampling_method = static_cast<s32>(g_ActiveConfig.output_resampling_mode);
  // Color correction related uniforms.
  // These are mainly used by the "m_default_pixel_shader",
  // but should also be accessible to all other shaders.
@ -883,6 +915,8 @@ void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle<int>& src,
  std::memcpy(buffer, &builtin_uniforms, sizeof(builtin_uniforms));
  buffer += sizeof(builtin_uniforms);

+  // Don't include the custom pp shader options if they are not necessary,
+  // having mismatching uniforms between different shaders can cause issues on some backends
  if (!user_post_process)
    return;

@ -1000,8 +1034,7 @@ bool PostProcessing::CompilePipeline()
  const bool needs_intermediary_buffer = NeedsIntermediaryBuffer();

  AbstractPipelineConfig config = {};
-  config.vertex_shader =
-      needs_intermediary_buffer ? m_vertex_shader.get() : m_default_vertex_shader.get();
+  config.vertex_shader = m_default_vertex_shader.get();
  // This geometry shader will take care of reading both layer 0 and 1 on the source texture,
  // and writing to both layer 0 and 1 on the render target.
  config.geometry_shader = UseGeometryShaderForPostProcess(needs_intermediary_buffer) ?
@ -1018,7 +1051,7 @@ bool PostProcessing::CompilePipeline()
  if (config.pixel_shader)
    m_default_pipeline = g_gfx->CreatePipeline(config);

-  config.vertex_shader = m_default_vertex_shader.get();
+  config.vertex_shader = m_vertex_shader.get();
  config.geometry_shader = UseGeometryShaderForPostProcess(false) ?
                               g_shader_cache->GetTexcoordGeometryShader() :
                               nullptr;
--- a/Source/Core/VideoCommon/PostProcessing.h
+++ b/Source/Core/VideoCommon/PostProcessing.h
@ -124,7 +124,8 @@ protected:
  size_t CalculateUniformsSize(bool user_post_process) const;
  void FillUniformBuffer(const MathUtil::Rectangle<int>& src, const AbstractTexture* src_tex,
                         int src_layer, const MathUtil::Rectangle<int>& dst,
-                         const MathUtil::Rectangle<int>& wnd, u8* buffer, bool user_post_process);
+                         const MathUtil::Rectangle<int>& wnd, u8* buffer, bool user_post_process,
+                         bool intermediary_buffer);

  // Timer for determining our time value
  Common::Timer m_timer;
--- a/Source/Core/VideoCommon/VideoConfig.cpp
+++ b/Source/Core/VideoCommon/VideoConfig.cpp
@ -133,6 +133,7 @@ void VideoConfig::Refresh()

  texture_filtering_mode = Config::Get(Config::GFX_ENHANCE_FORCE_TEXTURE_FILTERING);
  iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);
+  output_resampling_mode = Config::Get(Config::GFX_ENHANCE_OUTPUT_RESAMPLING);
  sPostProcessingShader = Config::Get(Config::GFX_ENHANCE_POST_SHADER);
  bForceTrueColor = Config::Get(Config::GFX_ENHANCE_FORCE_TRUE_COLOR);
  bDisableCopyFilter = Config::Get(Config::GFX_ENHANCE_DISABLE_COPY_FILTER);
--- a/Source/Core/VideoCommon/VideoConfig.h
+++ b/Source/Core/VideoCommon/VideoConfig.h
@ -52,6 +52,17 @@ enum class TextureFilteringMode : int
  Linear,
 };

+enum class OutputResamplingMode : int
+{
+  Default,
+  Bilinear,
+  BSpline,
+  MitchellNetravali,
+  CatmullRom,
+  SharpBilinear,
+  AreaSampling,
+};
+
 enum class ColorCorrectionRegion : int
 {
  SMPTE_NTSCM,
@ -103,6 +114,7 @@ struct VideoConfig final
  bool bSSAA = false;
  int iEFBScale = 0;
  TextureFilteringMode texture_filtering_mode = TextureFilteringMode::Default;
+  OutputResamplingMode output_resampling_mode = OutputResamplingMode::Default;
  int iMaxAnisotropy = 0;
  std::string sPostProcessingShader;
  bool bForceTrueColor = false;