From 9e798eec94a8ebe94c1ef8270b504b5698b2a0f5 Mon Sep 17 00:00:00 2001
From: Stenzek <stenzek@gmail.com>
Date: Sun, 29 Apr 2018 18:52:30 +1000
Subject: [PATCH] Implement EFB copy filter and gamma in hardware backends

Also makes y_scale a dynamic parameter for EFB copies, as it doesn't
make sense to keep it as part of the uid, otherwise we're generating
redundant shaders.
---
 Source/Core/Core/Config/GraphicsSettings.cpp  |   2 +
 Source/Core/Core/Config/GraphicsSettings.h    |   1 +
 .../Core/ConfigLoaders/IsSettingSaveable.cpp  |   1 +
 .../VideoBackends/D3D/PSTextureEncoder.cpp    |  24 +-
 .../Core/VideoBackends/D3D/PSTextureEncoder.h |   6 +-
 Source/Core/VideoBackends/D3D/Render.cpp      |  17 +-
 Source/Core/VideoBackends/D3D/Render.h        |   4 +-
 .../Core/VideoBackends/D3D/TextureCache.cpp   |  47 +++-
 Source/Core/VideoBackends/D3D/TextureCache.h  |  10 +-
 Source/Core/VideoBackends/Null/Render.cpp     |   2 +-
 Source/Core/VideoBackends/Null/Render.h       |   2 +-
 Source/Core/VideoBackends/Null/TextureCache.h |   7 +-
 Source/Core/VideoBackends/OGL/Render.cpp      |   3 +-
 Source/Core/VideoBackends/OGL/Render.h        |   2 +-
 .../Core/VideoBackends/OGL/TextureCache.cpp   |  30 ++-
 Source/Core/VideoBackends/OGL/TextureCache.h  |  12 +-
 .../VideoBackends/OGL/TextureConverter.cpp    |  19 +-
 .../Core/VideoBackends/OGL/TextureConverter.h |  11 +-
 .../VideoBackends/Software/EfbInterface.cpp   |  33 ++-
 .../VideoBackends/Software/EfbInterface.h     |   3 +-
 .../VideoBackends/Software/SWRenderer.cpp     |   3 +-
 .../Core/VideoBackends/Software/SWRenderer.h  |   2 +-
 .../VideoBackends/Software/TextureCache.h     |   9 +-
 .../VideoBackends/Software/TextureEncoder.cpp |   9 +-
 .../VideoBackends/Software/TextureEncoder.h   |   6 +-
 Source/Core/VideoBackends/Vulkan/Renderer.cpp |   3 +-
 Source/Core/VideoBackends/Vulkan/Renderer.h   |   2 +-
 .../VideoBackends/Vulkan/TextureCache.cpp     |  38 ++-
 .../Core/VideoBackends/Vulkan/TextureCache.h  |   8 +-
 .../VideoBackends/Vulkan/TextureConverter.cpp |  27 +-
 .../VideoBackends/Vulkan/TextureConverter.h   |   9 +-
 Source/Core/VideoCommon/BPMemory.h            |   4 +-
 Source/Core/VideoCommon/BPStructs.cpp         |  12 +-
 Source/Core/VideoCommon/RenderBase.cpp        |   2 +-
 Source/Core/VideoCommon/RenderBase.h          |   3 +-
 Source/Core/VideoCommon/TextureCacheBase.cpp  |  49 +++-
 Source/Core/VideoCommon/TextureCacheBase.h    |  31 ++-
 .../VideoCommon/TextureConversionShader.cpp   | 245 +++++++++++-------
 .../VideoCommon/TextureConverterShaderGen.cpp |  62 +++--
 Source/Core/VideoCommon/VideoConfig.cpp       |   1 +
 Source/Core/VideoCommon/VideoConfig.h         |   1 +
 41 files changed, 526 insertions(+), 236 deletions(-)
diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp
index f46c7a585a..cfae1b14f2 100644
--- a/Source/Core/Core/Config/GraphicsSettings.cpp
+++ b/Source/Core/Core/Config/GraphicsSettings.cpp
@@ -106,6 +106,8 @@ const ConfigInfo<std::string> GFX_ENHANCE_POST_SHADER{
     {System::GFX, "Enhancements", "PostProcessingShader"}, ""};
 const ConfigInfo<bool> GFX_ENHANCE_FORCE_TRUE_COLOR{{System::GFX, "Enhancements", "ForceTrueColor"},
                                                     true};
+const ConfigInfo<bool> GFX_ENHANCE_DISABLE_COPY_FILTER{
+    {System::GFX, "Enhancements", "DisableCopyFilter"}, true};
 
 // Graphics.Stereoscopy
 
diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h
index 4f0e4f741c..6b4c900e01 100644
--- a/Source/Core/Core/Config/GraphicsSettings.h
+++ b/Source/Core/Core/Config/GraphicsSettings.h
@@ -80,6 +80,7 @@ extern const ConfigInfo<bool> GFX_ENHANCE_FORCE_FILTERING;
 extern const ConfigInfo<int> GFX_ENHANCE_MAX_ANISOTROPY;  // NOTE - this is x in (1 << x)
 extern const ConfigInfo<std::string> GFX_ENHANCE_POST_SHADER;
 extern const ConfigInfo<bool> GFX_ENHANCE_FORCE_TRUE_COLOR;
+extern const ConfigInfo<bool> GFX_ENHANCE_DISABLE_COPY_FILTER;
 
 // Graphics.Stereoscopy
 
diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp
index 7b92427749..56b3ec8c74 100644
--- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp
+++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp
@@ -85,6 +85,7 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location)
       Config::GFX_ENHANCE_MAX_ANISOTROPY.location,
       Config::GFX_ENHANCE_POST_SHADER.location,
       Config::GFX_ENHANCE_FORCE_TRUE_COLOR.location,
+      Config::GFX_ENHANCE_DISABLE_COPY_FILTER.location,
 
       // Graphics.Stereoscopy
 
diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp
index 8d48332af7..de0d9716ec 100644
--- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp
+++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp
@@ -31,7 +31,11 @@ struct EFBEncodeParams
   u32 DestWidth;
   u32 ScaleFactor;
   float y_scale;
-  u32 padding[3];
+  float gamma_rcp;
+  float clamp_top;
+  float clamp_bottom;
+  s32 filter_coefficients[3];
+  u32 padding;
 };
 
 PSTextureEncoder::PSTextureEncoder()
@@ -66,9 +70,11 @@ void PSTextureEncoder::Shutdown()
   SAFE_RELEASE(m_encode_params);
 }
 
-void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_width,
-                              u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
-                              const EFBRectangle& src_rect, bool scale_by_half)
+void PSTextureEncoder::Encode(
+    u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
+    u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma,
+    bool clamp_top, bool clamp_bottom,
+    const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
 {
   // Resolve MSAA targets before copying.
   // FIXME: Instead of resolving EFB, it would be better to pick out a
@@ -101,7 +107,13 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_w
     encode_params.SrcTop = src_rect.top;
     encode_params.DestWidth = native_width;
     encode_params.ScaleFactor = scale_by_half ? 2 : 1;
-    encode_params.y_scale = params.y_scale;
+    encode_params.y_scale = y_scale;
+    encode_params.gamma_rcp = 1.0f / gamma;
+    encode_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
+    encode_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 0.0f;
+    for (size_t i = 0; i < filter_coefficients.size(); i++)
+      encode_params.filter_coefficients[i] = filter_coefficients[i];
+
     D3D::context->UpdateSubresource(m_encode_params, 0, nullptr, &encode_params, 0, 0);
     D3D::stateman->SetPixelConstants(m_encode_params);
 
@@ -109,7 +121,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_w
     // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more
     //       complex down filtering to average all pixels and produce the correct result.
     // Also, box filtering won't be correct for anything other than 1x IR
-    if (scale_by_half || g_renderer->GetEFBScale() != 1 || params.y_scale > 1.0f)
+    if (scale_by_half || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f)
       D3D::SetLinearCopySampler();
     else
       D3D::SetPointCopySampler();
diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h
index 7c45970d19..4054b07fbf 100644
--- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h
+++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h
@@ -8,6 +8,7 @@
 #include <memory>
 
 #include "Common/CommonTypes.h"
+#include "VideoCommon/TextureCacheBase.h"
 #include "VideoCommon/TextureConversionShader.h"
 #include "VideoCommon/VideoCommon.h"
 
@@ -38,8 +39,9 @@ public:
   void Init();
   void Shutdown();
   void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
-              u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
-              bool scale_by_half);
+              u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
+              float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
+              const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);
 
 private:
   ID3D11PixelShader* GetEncodingPixelShader(const EFBCopyParams& params);
diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp
index 85e81f8171..40e02f4e12 100644
--- a/Source/Core/VideoBackends/D3D/Render.cpp
+++ b/Source/Core/VideoBackends/D3D/Render.cpp
@@ -627,8 +627,7 @@ void Renderer::ReinterpretPixelData(unsigned int convtype)
 }
 
 // This function has the final picture. We adjust the aspect ratio here.
-void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks,
-                        float Gamma)
+void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks)
 {
   ResetAPIState();
 
@@ -650,7 +649,7 @@ void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region
   auto* xfb_texture = static_cast<DXTexture*>(texture);
 
   BlitScreen(xfb_region, targetRc, xfb_texture->GetRawTexIdentifier(),
-             xfb_texture->GetConfig().width, xfb_texture->GetConfig().height, Gamma);
+             xfb_texture->GetConfig().width, xfb_texture->GetConfig().height);
 
   // Reset viewport for drawing text
   D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.0f, 0.0f, static_cast<float>(m_backbuffer_width),
@@ -854,7 +853,7 @@ void Renderer::BBoxWrite(int index, u16 _value)
 }
 
 void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D* src_texture,
-                          u32 src_width, u32 src_height, float Gamma)
+                          u32 src_width, u32 src_height)
 {
   if (g_ActiveConfig.stereo_mode == StereoMode::SBS ||
       g_ActiveConfig.stereo_mode == StereoMode::TAB)
@@ -871,13 +870,13 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D
     D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height,
                            PixelShaderCache::GetColorCopyProgram(false),
                            VertexShaderCache::GetSimpleVertexShader(),
-                           VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 0);
+                           VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 0);
 
     D3D::context->RSSetViewports(1, &rightVp);
     D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height,
                            PixelShaderCache::GetColorCopyProgram(false),
                            VertexShaderCache::GetSimpleVertexShader(),
-                           VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 1);
+                           VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 1);
   }
   else if (g_ActiveConfig.stereo_mode == StereoMode::Nvidia3DVision)
   {
@@ -896,13 +895,13 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D
     D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height,
                            PixelShaderCache::GetColorCopyProgram(false),
                            VertexShaderCache::GetSimpleVertexShader(),
-                           VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 0);
+                           VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 0);
 
     D3D::context->RSSetViewports(1, &rightVp);
     D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height,
                            PixelShaderCache::GetColorCopyProgram(false),
                            VertexShaderCache::GetSimpleVertexShader(),
-                           VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 1);
+                           VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 1);
 
     // Copy the left eye to the backbuffer, if Nvidia 3D Vision is enabled it should
     // recognize the signature and automatically include the right eye frame.
@@ -927,7 +926,7 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D
                                            nullptr;
     D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, pixelShader,
                            VertexShaderCache::GetSimpleVertexShader(),
-                           VertexShaderCache::GetSimpleInputLayout(), geomShader, Gamma);
+                           VertexShaderCache::GetSimpleInputLayout(), geomShader, 1.0f);
   }
 }
 
diff --git a/Source/Core/VideoBackends/D3D/Render.h b/Source/Core/VideoBackends/D3D/Render.h
index a7ccb0b9ae..0927f6c934 100644
--- a/Source/Core/VideoBackends/D3D/Render.h
+++ b/Source/Core/VideoBackends/D3D/Render.h
@@ -63,7 +63,7 @@ public:
 
   TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;
 
-  void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override;
+  void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override;
 
   void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
                    u32 color, u32 z) override;
@@ -84,7 +84,7 @@ private:
   void UpdateBackbufferSize();
 
   void BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D* src_texture,
-                  u32 src_width, u32 src_height, float Gamma);
+                  u32 src_width, u32 src_height);
 
   void UpdateUtilityUniformBuffer(const void* uniforms, u32 uniforms_size);
   void UpdateUtilityVertexBuffer(const void* vertices, u32 vertex_stride, u32 num_vertices);
diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp
index b0974f3cc3..9c5f139502 100644
--- a/Source/Core/VideoBackends/D3D/TextureCache.cpp
+++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp
@@ -33,10 +33,12 @@ static std::unique_ptr<PSTextureEncoder> g_encoder;
 
 void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
                            u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
-                           const EFBRectangle& src_rect, bool scale_by_half)
+                           const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
+                           float gamma, bool clamp_top, bool clamp_bottom,
+                           const CopyFilterCoefficientArray& filter_coefficients)
 {
   g_encoder->Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect,
-                    scale_by_half);
+                    scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients);
 }
 
 const char palette_shader[] =
@@ -137,9 +139,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
   D3D::stateman->SetTexture(1, palette_buf_srv);
 
   // TODO: Add support for C14X2 format.  (Different multiplier, more palette entries.)
-  float params[4] = {source->format == TextureFormat::I4 ? 15.f : 255.f};
-  D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, &params, 0, 0);
-  D3D::stateman->SetPixelConstants(palette_uniform);
+  float params[8] = {source->format == TextureFormat::I4 ? 15.f : 255.f};
+  D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, &params, 0, 0);
+  D3D::stateman->SetPixelConstants(uniform_buffer);
 
   const D3D11_RECT sourcerect = CD3D11_RECT(0, 0, source->GetWidth(), source->GetHeight());
 
@@ -180,7 +182,7 @@ TextureCache::TextureCache()
 
   palette_buf = nullptr;
   palette_buf_srv = nullptr;
-  palette_uniform = nullptr;
+  uniform_buffer = nullptr;
   palette_pixel_shader[static_cast<int>(TLUTFormat::IA8)] = GetConvertShader("IA8");
   palette_pixel_shader[static_cast<int>(TLUTFormat::RGB565)] = GetConvertShader("RGB565");
   palette_pixel_shader[static_cast<int>(TLUTFormat::RGB5A3)] = GetConvertShader("RGB5A3");
@@ -195,10 +197,10 @@ TextureCache::TextureCache()
   CHECK(SUCCEEDED(hr), "create palette decoder lut srv");
   D3D::SetDebugObjectName(palette_buf_srv, "texture decoder lut srv");
   const D3D11_BUFFER_DESC cbdesc =
-      CD3D11_BUFFER_DESC(16, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
-  hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &palette_uniform);
+      CD3D11_BUFFER_DESC(sizeof(float) * 8, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
+  hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &uniform_buffer);
   CHECK(SUCCEEDED(hr), "Create palette decoder constant buffer");
-  D3D::SetDebugObjectName(palette_uniform,
+  D3D::SetDebugObjectName(uniform_buffer,
                           "a constant buffer used in TextureCache::CopyRenderTargetToTexture");
 }
 
@@ -209,7 +211,7 @@ TextureCache::~TextureCache()
 
   SAFE_RELEASE(palette_buf);
   SAFE_RELEASE(palette_buf_srv);
-  SAFE_RELEASE(palette_uniform);
+  SAFE_RELEASE(uniform_buffer);
   for (auto*& shader : palette_pixel_shader)
     SAFE_RELEASE(shader);
   for (auto& iter : m_efb_to_tex_pixel_shaders)
@@ -218,7 +220,9 @@ TextureCache::~TextureCache()
 
 void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
                                        const EFBRectangle& src_rect, bool scale_by_half,
-                                       EFBCopyFormat dst_format, bool is_intensity)
+                                       EFBCopyFormat dst_format, bool is_intensity, float gamma,
+                                       bool clamp_top, bool clamp_bottom,
+                                       const CopyFilterCoefficientArray& filter_coefficients)
 {
   auto* destination_texture = static_cast<DXTexture*>(entry->texture.get());
 
@@ -260,6 +264,27 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
   else
     D3D::SetPointCopySampler();
 
+  struct PixelConstants
+  {
+    float filter_coefficients[3];
+    float gamma_rcp;
+    float clamp_top;
+    float clamp_bottom;
+    float pixel_height;
+    u32 padding;
+  };
+  PixelConstants constants;
+  for (size_t i = 0; i < filter_coefficients.size(); i++)
+    constants.filter_coefficients[i] = filter_coefficients[i] / 64.0f;
+  constants.gamma_rcp = 1.0f / gamma;
+  constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
+  constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;
+  constants.pixel_height =
+      g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT;
+  constants.padding = 0;
+  D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, &constants, 0, 0);
+  D3D::stateman->SetPixelConstants(uniform_buffer);
+
   // Make sure we don't draw with the texture set as both a source and target.
   // (This can happen because we don't unbind textures when we free them.)
   D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV());
diff --git a/Source/Core/VideoBackends/D3D/TextureCache.h b/Source/Core/VideoBackends/D3D/TextureCache.h
index 49332e2de3..24dda22d60 100644
--- a/Source/Core/VideoBackends/D3D/TextureCache.h
+++ b/Source/Core/VideoBackends/D3D/TextureCache.h
@@ -34,11 +34,13 @@ private:
 
   void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
                u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
-               bool scale_by_half) override;
+               bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
+               const CopyFilterCoefficientArray& filter_coefficients) override;
 
   void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
-                           bool scale_by_half, EFBCopyFormat dst_format,
-                           bool is_intensity) override;
+                           bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
+                           float gamma, bool clamp_top, bool clamp_bottom,
+                           const CopyFilterCoefficientArray& filter_coefficients) override;
 
   bool CompileShaders() override { return true; }
   void DeleteShaders() override {}
@@ -46,7 +48,7 @@ private:
 
   ID3D11Buffer* palette_buf;
   ID3D11ShaderResourceView* palette_buf_srv;
-  ID3D11Buffer* palette_uniform;
+  ID3D11Buffer* uniform_buffer;
   ID3D11PixelShader* palette_pixel_shader[3];
 
   std::map<TextureConversionShaderGen::TCShaderUid, ID3D11PixelShader*> m_efb_to_tex_pixel_shaders;
diff --git a/Source/Core/VideoBackends/Null/Render.cpp b/Source/Core/VideoBackends/Null/Render.cpp
index 4e9cf655c2..c9c0e62325 100644
--- a/Source/Core/VideoBackends/Null/Render.cpp
+++ b/Source/Core/VideoBackends/Null/Render.cpp
@@ -92,7 +92,7 @@ TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc)
   return result;
 }
 
-void Renderer::SwapImpl(AbstractTexture*, const EFBRectangle&, u64, float)
+void Renderer::SwapImpl(AbstractTexture*, const EFBRectangle&, u64)
 {
   UpdateActiveConfig();
 }
diff --git a/Source/Core/VideoBackends/Null/Render.h b/Source/Core/VideoBackends/Null/Render.h
index 0c0092554f..c1bf9c122e 100644
--- a/Source/Core/VideoBackends/Null/Render.h
+++ b/Source/Core/VideoBackends/Null/Render.h
@@ -34,7 +34,7 @@ public:
   void BBoxWrite(int index, u16 value) override {}
   TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;
 
-  void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override;
+  void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override;
 
   void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
                    u32 color, u32 z) override
diff --git a/Source/Core/VideoBackends/Null/TextureCache.h b/Source/Core/VideoBackends/Null/TextureCache.h
index cf9dfa84a2..25803344a4 100644
--- a/Source/Core/VideoBackends/Null/TextureCache.h
+++ b/Source/Core/VideoBackends/Null/TextureCache.h
@@ -27,12 +27,15 @@ public:
 
   void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
                u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
-               bool scale_by_half) override
+               bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
+               const CopyFilterCoefficientArray& filter_coefficients) override
   {
   }
 
   void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
-                           bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity) override
+                           bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
+                           float gamma, bool clamp_top, bool clamp_bottom,
+                           const CopyFilterCoefficientArray& filter_coefficients) override
   {
   }
 };
diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp
index d5259aa651..f049ee9e3a 100644
--- a/Source/Core/VideoBackends/OGL/Render.cpp
+++ b/Source/Core/VideoBackends/OGL/Render.cpp
@@ -1365,8 +1365,7 @@ void Renderer::ApplyBlendingState(const BlendingState state, bool force)
 }
 
 // This function has the final picture. We adjust the aspect ratio here.
-void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks,
-                        float Gamma)
+void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks)
 {
   if (g_ogl_config.bSupportsDebug)
   {
diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h
index d8882e2111..c27c06308c 100644
--- a/Source/Core/VideoBackends/OGL/Render.h
+++ b/Source/Core/VideoBackends/OGL/Render.h
@@ -126,7 +126,7 @@ public:
 
   TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;
 
-  void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override;
+  void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override;
 
   void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
                    u32 color, u32 z) override;
diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp
index 24f94e39db..086d0f29f2 100644
--- a/Source/Core/VideoBackends/OGL/TextureCache.cpp
+++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp
@@ -68,10 +68,18 @@ constexpr const char* geometry_program = "layout(triangles) in;\n"
 
 void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
                            u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
-                           const EFBRectangle& src_rect, bool scale_by_half)
+                           const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
+                           float gamma, bool clamp_top, bool clamp_bottom,
+                           const CopyFilterCoefficientArray& filter_coefficients)
 {
+  // Flip top/bottom due to lower-left coordinate system.
+  float clamp_top_val =
+      clamp_bottom ? (1.0f - src_rect.bottom / static_cast<float>(EFB_HEIGHT)) : 0.0f;
+  float clamp_bottom_val =
+      clamp_top ? (1.0f - src_rect.top / static_cast<float>(EFB_HEIGHT)) : 0.0f;
   TextureConverter::EncodeToRamFromTexture(dst, params, native_width, bytes_per_row, num_blocks_y,
-                                           memory_stride, src_rect, scale_by_half);
+                                           memory_stride, src_rect, scale_by_half, y_scale, gamma,
+                                           clamp_top_val, clamp_bottom_val, filter_coefficients);
 }
 
 TextureCache::TextureCache()
@@ -483,7 +491,9 @@ void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u
 
 void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
                                        const EFBRectangle& src_rect, bool scale_by_half,
-                                       EFBCopyFormat dst_format, bool is_intensity)
+                                       EFBCopyFormat dst_format, bool is_intensity, float gamma,
+                                       bool clamp_top, bool clamp_bottom,
+                                       const CopyFilterCoefficientArray& filter_coefficients)
 {
   auto* destination_texture = static_cast<OGLTexture*>(entry->texture.get());
   g_renderer->ResetAPIState();  // reset any game specific settings
@@ -528,6 +538,11 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
                                       code.GetBuffer(), geo_program);
 
     shader.position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position");
+    shader.pixel_height_uniform = glGetUniformLocation(shader.shader.glprogid, "pixel_height");
+    shader.gamma_rcp_uniform = glGetUniformLocation(shader.shader.glprogid, "gamma_rcp");
+    shader.clamp_tb_uniform = glGetUniformLocation(shader.shader.glprogid, "clamp_tb");
+    shader.filter_coefficients_uniform =
+        glGetUniformLocation(shader.shader.glprogid, "filter_coefficients");
   }
 
   shader.shader.Bind();
@@ -535,6 +550,15 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
   TargetRectangle R = g_renderer->ConvertEFBRectangle(src_rect);
   glUniform4f(shader.position_uniform, static_cast<float>(R.left), static_cast<float>(R.top),
               static_cast<float>(R.right), static_cast<float>(R.bottom));
+  glUniform1f(shader.pixel_height_uniform, g_ActiveConfig.bCopyEFBScaled ?
+                                               1.0f / g_renderer->GetTargetHeight() :
+                                               1.0f / EFB_HEIGHT);
+  glUniform1f(shader.gamma_rcp_uniform, 1.0f / gamma);
+  glUniform2f(shader.clamp_tb_uniform,
+              clamp_bottom ? (1.0f - src_rect.bottom / static_cast<float>(EFB_HEIGHT)) : 0.0f,
+              clamp_top ? (1.0f - src_rect.top / static_cast<float>(EFB_HEIGHT)) : 0.0f);
+  glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0] / 64.0f,
+              filter_coefficients[1] / 64.0f, filter_coefficients[2] / 64.0f);
 
   ProgramShaderCache::BindVertexFormat(nullptr);
   glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h
index 3923919df6..54dc4afef8 100644
--- a/Source/Core/VideoBackends/OGL/TextureCache.h
+++ b/Source/Core/VideoBackends/OGL/TextureCache.h
@@ -65,11 +65,13 @@ private:
 
   void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
                u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
-               bool scale_by_half) override;
+               bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
+               const CopyFilterCoefficientArray& filter_coefficients) override;
 
   void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
-                           bool scale_by_half, EFBCopyFormat dst_format,
-                           bool is_intensity) override;
+                           bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
+                           float gamma, bool clamp_top, bool clamp_bottom,
+                           const CopyFilterCoefficientArray& filter_coefficients) override;
 
   bool CompileShaders() override;
   void DeleteShaders() override;
@@ -84,6 +86,10 @@ private:
   {
     SHADER shader;
     GLuint position_uniform;
+    GLuint pixel_height_uniform;
+    GLuint gamma_rcp_uniform;
+    GLuint clamp_tb_uniform;
+    GLuint filter_coefficients_uniform;
   };
 
   std::map<TextureConversionShaderGen::TCShaderUid, EFBCopyShader> m_efb_copy_programs;
diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp
index e1ec911a65..615cc9e1c7 100644
--- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp
+++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp
@@ -39,6 +39,9 @@ struct EncodingProgram
   SHADER program;
   GLint copy_position_uniform;
   GLint y_scale_uniform;
+  GLint gamma_rcp_uniform;
+  GLint clamp_tb_uniform;
+  GLint filter_coefficients_uniform;
 };
 
 std::map<EFBCopyParams, EncodingProgram> s_encoding_programs;
@@ -81,6 +84,10 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params)
 
   program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position");
   program.y_scale_uniform = glGetUniformLocation(program.program.glprogid, "y_scale");
+  program.gamma_rcp_uniform = glGetUniformLocation(program.program.glprogid, "gamma_rcp");
+  program.clamp_tb_uniform = glGetUniformLocation(program.program.glprogid, "clamp_tb");
+  program.filter_coefficients_uniform =
+      glGetUniformLocation(program.program.glprogid, "filter_coefficients");
   return s_encoding_programs.emplace(params, program).first->second;
 }
 
@@ -137,7 +144,9 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line
 
 void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
                             u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
-                            const EFBRectangle& src_rect, bool scale_by_half)
+                            const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
+                            float gamma, float clamp_top, float clamp_bottom,
+                            const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
 {
   g_renderer->ResetAPIState();
 
@@ -146,14 +155,18 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ
   texconv_shader.program.Bind();
   glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width,
               scale_by_half ? 2 : 1);
-  glUniform1f(texconv_shader.y_scale_uniform, params.y_scale);
+  glUniform1f(texconv_shader.y_scale_uniform, y_scale);
+  glUniform1f(texconv_shader.gamma_rcp_uniform, 1.0f / gamma);
+  glUniform2f(texconv_shader.clamp_tb_uniform, clamp_top, clamp_bottom);
+  glUniform3i(texconv_shader.filter_coefficients_uniform, filter_coefficients[0],
+              filter_coefficients[1], filter_coefficients[2]);
 
   const GLuint read_texture = params.depth ?
                                   FramebufferManager::ResolveAndGetDepthTarget(src_rect) :
                                   FramebufferManager::ResolveAndGetRenderTarget(src_rect);
 
   EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride,
-                         scale_by_half && !params.depth, params.y_scale);
+                         scale_by_half && !params.depth, y_scale);
 
   g_renderer->RestoreAPIState();
 }
diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.h b/Source/Core/VideoBackends/OGL/TextureConverter.h
index baed715a7e..0d7450e4b6 100644
--- a/Source/Core/VideoBackends/OGL/TextureConverter.h
+++ b/Source/Core/VideoBackends/OGL/TextureConverter.h
@@ -7,10 +7,9 @@
 #include "Common/CommonTypes.h"
 #include "Common/GL/GLUtil.h"
 
+#include "VideoCommon/TextureCacheBase.h"
 #include "VideoCommon/VideoCommon.h"
 
-struct EFBCopyParams;
-
 namespace OGL
 {
 // Converts textures between formats using shaders
@@ -21,9 +20,11 @@ void Init();
 void Shutdown();
 
 // returns size of the encoded data (in bytes)
-void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
-                            u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
-                            const EFBRectangle& src_rect, bool scale_by_half);
+void EncodeToRamFromTexture(
+    u8* dest_ptr, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
+    u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
+    float y_scale, float gamma, float clamp_top, float clamp_bottom,
+    const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);
 }
 
 }  // namespace OGL
diff --git a/Source/Core/VideoBackends/Software/EfbInterface.cpp b/Source/Core/VideoBackends/Software/EfbInterface.cpp
index 5d852e0ff9..a13feb6c50 100644
--- a/Source/Core/VideoBackends/Software/EfbInterface.cpp
+++ b/Source/Core/VideoBackends/Software/EfbInterface.cpp
@@ -502,6 +502,23 @@ static u32 VerticalFilter(const std::array<u32, 3>& colors,
   return out_color32;
 }
 
+static u32 GammaCorrection(u32 color, const float gamma_rcp)
+{
+  u8 in_colors[4];
+  std::memcpy(&in_colors, &color, sizeof(in_colors));
+
+  u8 out_color[4];
+  for (int i = BLU_C; i <= RED_C; i++)
+  {
+    out_color[i] = static_cast<u8>(
+        MathUtil::Clamp(std::pow(in_colors[i] / 255.0f, gamma_rcp) * 255.0f, 0.0f, 255.0f));
+  }
+
+  u32 out_color32;
+  std::memcpy(&out_color32, out_color, sizeof(out_color32));
+  return out_color32;
+}
+
 // For internal used only, return a non-normalized value, which saves work later.
 static yuv444 ConvertColorToYUV(u32 color)
 {
@@ -530,8 +547,7 @@ u8* GetPixelPointer(u16 x, u16 y, bool depth)
 }
 
 void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale,
-               bool clamp_top, bool clamp_bottom, float Gamma,
-               const std::array<u8, 7>& filterCoefficients)
+               float gamma)
 {
   if (!xfb_in_ram)
   {
@@ -539,8 +555,12 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec
     return;
   }
 
-  int left = source_rect.left;
-  int right = source_rect.right;
+  const int left = source_rect.left;
+  const int right = source_rect.right;
+  const bool clamp_top = bpmem.triggerEFBCopy.clamp_top;
+  const bool clamp_bottom = bpmem.triggerEFBCopy.clamp_bottom;
+  const float gamma_rcp = 1.0f / gamma;
+  const auto filter_coefficients = bpmem.copyfilter.GetCoefficients();
 
   // this assumes copies will always start on an even (YU) pixel and the
   // copy always has an even width, which might not be true.
@@ -575,9 +595,10 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec
       std::array<u32, 3> colors = {{GetColor(x, y_prev), GetColor(x, y), GetColor(x, y_next)}};
 
       // Vertical Filter (Multisampling resolve, deflicker, brightness)
-      u32 filtered = VerticalFilter(colors, filterCoefficients);
+      u32 filtered = VerticalFilter(colors, filter_coefficients);
 
-      // TODO: Gamma correction happens here.
+      // Gamma correction happens here.
+      filtered = GammaCorrection(filtered, gamma_rcp);
 
       scanline[i] = ConvertColorToYUV(filtered);
     }
diff --git a/Source/Core/VideoBackends/Software/EfbInterface.h b/Source/Core/VideoBackends/Software/EfbInterface.h
index 9d0706a83c..7f7c0ec608 100644
--- a/Source/Core/VideoBackends/Software/EfbInterface.h
+++ b/Source/Core/VideoBackends/Software/EfbInterface.h
@@ -59,8 +59,7 @@ u32 GetDepth(u16 x, u16 y);
 u8* GetPixelPointer(u16 x, u16 y, bool depth);
 
 void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale,
-               bool clamp_top, bool clamp_bottom, float Gamma,
-               const std::array<u8, 7>& filterCoefficients);
+               float gamma);
 
 extern u32 perf_values[PQ_NUM_MEMBERS];
 inline void IncPerfCounterQuadCount(PerfQueryType type)
diff --git a/Source/Core/VideoBackends/Software/SWRenderer.cpp b/Source/Core/VideoBackends/Software/SWRenderer.cpp
index 51b035dd82..f4e322619d 100644
--- a/Source/Core/VideoBackends/Software/SWRenderer.cpp
+++ b/Source/Core/VideoBackends/Software/SWRenderer.cpp
@@ -87,8 +87,7 @@ std::unique_ptr<AbstractPipeline> SWRenderer::CreatePipeline(const AbstractPipel
 }
 
 // Called on the GPU thread
-void SWRenderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks,
-                          float Gamma)
+void SWRenderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks)
 {
   OSD::DoCallbacks(OSD::CallbackType::OnFrame);
 
diff --git a/Source/Core/VideoBackends/Software/SWRenderer.h b/Source/Core/VideoBackends/Software/SWRenderer.h
index 2c4a5aeef6..3c274edda7 100644
--- a/Source/Core/VideoBackends/Software/SWRenderer.h
+++ b/Source/Core/VideoBackends/Software/SWRenderer.h
@@ -34,7 +34,7 @@ public:
 
   TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;
 
-  void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override;
+  void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override;
 
   void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
                    u32 color, u32 z) override;
diff --git a/Source/Core/VideoBackends/Software/TextureCache.h b/Source/Core/VideoBackends/Software/TextureCache.h
index 13a70002f2..f3d9c91938 100644
--- a/Source/Core/VideoBackends/Software/TextureCache.h
+++ b/Source/Core/VideoBackends/Software/TextureCache.h
@@ -18,15 +18,18 @@ public:
   }
   void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
                u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
-               bool scale_by_half) override
+               bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
+               const CopyFilterCoefficientArray& filter_coefficients) override
   {
     TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
-                           src_rect, scale_by_half);
+                           src_rect, scale_by_half, y_scale, gamma);
   }
 
 private:
   void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
-                           bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity) override
+                           bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
+                           float gamma, bool clamp_top, bool clamp_bottom,
+                           const CopyFilterCoefficientArray& filter_coefficients) override
   {
     // TODO: If we ever want to "fake" vram textures, we would need to implement this
   }
diff --git a/Source/Core/VideoBackends/Software/TextureEncoder.cpp b/Source/Core/VideoBackends/Software/TextureEncoder.cpp
index 5308dca705..a31888e544 100644
--- a/Source/Core/VideoBackends/Software/TextureEncoder.cpp
+++ b/Source/Core/VideoBackends/Software/TextureEncoder.cpp
@@ -1469,15 +1469,12 @@ void EncodeEfbCopy(u8* dst, const EFBCopyParams& params, u32 native_width, u32 b
 }
 
 void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
-            u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half)
+            u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
+            float y_scale, float gamma)
 {
   if (params.copy_format == EFBCopyFormat::XFB)
   {
-    static constexpr std::array<float, 4> gamma_LUT = {1.0f, 1.7f, 2.2f, 1.0f};
-    EfbInterface::EncodeXFB(dst, native_width, src_rect, params.y_scale,
-                            !!bpmem.triggerEFBCopy.clamp_top, !!bpmem.triggerEFBCopy.clamp_bottom,
-                            gamma_LUT[bpmem.triggerEFBCopy.gamma],
-                            bpmem.copyfilter.GetCoefficients());
+    EfbInterface::EncodeXFB(dst, native_width, src_rect, y_scale, gamma);
   }
   else
   {
diff --git a/Source/Core/VideoBackends/Software/TextureEncoder.h b/Source/Core/VideoBackends/Software/TextureEncoder.h
index ec21c97c42..20aa05605a 100644
--- a/Source/Core/VideoBackends/Software/TextureEncoder.h
+++ b/Source/Core/VideoBackends/Software/TextureEncoder.h
@@ -5,12 +5,12 @@
 #pragma once
 
 #include "Common/CommonTypes.h"
+#include "VideoCommon/TextureCacheBase.h"
 #include "VideoCommon/VideoCommon.h"
 
-struct EFBCopyParams;
-
 namespace TextureEncoder
 {
 void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
-            u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half);
+            u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half,
+            float y_scale, float gamma);
 }
diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp
index c69ad396d6..38d7bd0992 100644
--- a/Source/Core/VideoBackends/Vulkan/Renderer.cpp
+++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp
@@ -697,8 +697,7 @@ void Renderer::ReinterpretPixelData(unsigned int convtype)
   BindEFBToStateTracker();
 }
 
-void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks,
-                        float Gamma)
+void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks)
 {
   // Pending/batched EFB pokes should be included in the final image.
   FramebufferManager::GetInstance()->FlushEFBPokes();
diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.h b/Source/Core/VideoBackends/Vulkan/Renderer.h
index b2662a3d4b..ce15d12592 100644
--- a/Source/Core/VideoBackends/Vulkan/Renderer.h
+++ b/Source/Core/VideoBackends/Vulkan/Renderer.h
@@ -59,7 +59,7 @@ public:
   void BBoxWrite(int index, u16 value) override;
   TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;
 
-  void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override;
+  void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override;
 
   void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable,
                    u32 color, u32 z) override;
diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
index cb61cd4de4..bc7a0d0f18 100644
--- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
+++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
@@ -100,7 +100,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
 
 void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
                            u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
-                           const EFBRectangle& src_rect, bool scale_by_half)
+                           const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
+                           float gamma, bool clamp_top, bool clamp_bottom,
+                           const CopyFilterCoefficientArray& filter_coefficients)
 {
   // Flush EFB pokes first, as they're expected to be included.
   FramebufferManager::GetInstance()->FlushEFBPokes();
@@ -131,9 +133,9 @@ void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_widt
   src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(),
                                   VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
 
-  m_texture_converter->EncodeTextureToMemory(src_texture->GetView(), dst, params, native_width,
-                                             bytes_per_row, num_blocks_y, memory_stride, src_rect,
-                                             scale_by_half);
+  m_texture_converter->EncodeTextureToMemory(
+      src_texture->GetView(), dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
+      src_rect, scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients);
 
   // Transition back to original state
   src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout);
@@ -209,7 +211,9 @@ void TextureCache::DeleteShaders()
 
 void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
                                        const EFBRectangle& src_rect, bool scale_by_half,
-                                       EFBCopyFormat dst_format, bool is_intensity)
+                                       EFBCopyFormat dst_format, bool is_intensity, float gamma,
+                                       bool clamp_top, bool clamp_bottom,
+                                       const CopyFilterCoefficientArray& filter_coefficients)
 {
   VKTexture* texture = static_cast<VKTexture*>(entry->texture.get());
 
@@ -228,6 +232,26 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
   VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer();
   StateTracker::GetInstance()->EndRenderPass();
 
+  // Fill uniform buffer.
+  struct PixelUniforms
+  {
+    float filter_coefficients[3];
+    float gamma_rcp;
+    float clamp_top;
+    float clamp_bottom;
+    float pixel_height;
+    u32 padding;
+  };
+  PixelUniforms uniforms;
+  for (size_t i = 0; i < filter_coefficients.size(); i++)
+    uniforms.filter_coefficients[i] = filter_coefficients[i] / 64.0f;
+  uniforms.gamma_rcp = 1.0f / gamma;
+  uniforms.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
+  uniforms.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;
+  uniforms.pixel_height =
+      g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT;
+  uniforms.padding = 0;
+
   // Transition EFB to shader resource before binding.
   // An out-of-bounds source region is valid here, and fine for the draw (since it is converted
   // to texture coordinates), but it's not valid to resolve an out-of-range rectangle.
@@ -274,6 +298,10 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
                          g_shader_cache->GetPassthroughVertexShader(),
                          g_shader_cache->GetPassthroughGeometryShader(), shader);
 
+  u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(PixelUniforms));
+  std::memcpy(ubo_ptr, &uniforms, sizeof(PixelUniforms));
+  draw.CommitPSUniforms(sizeof(PixelUniforms));
+
   draw.SetPSSampler(0, src_texture->GetView(), src_sampler);
 
   VkRect2D dest_region = {{0, 0}, {texture->GetConfig().width, texture->GetConfig().height}};
diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.h b/Source/Core/VideoBackends/Vulkan/TextureCache.h
index b27f9ad0e7..846761d1d5 100644
--- a/Source/Core/VideoBackends/Vulkan/TextureCache.h
+++ b/Source/Core/VideoBackends/Vulkan/TextureCache.h
@@ -38,7 +38,8 @@ public:
 
   void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
                u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
-               bool scale_by_half) override;
+               bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
+               const CopyFilterCoefficientArray& filter_coefficients) override;
 
   bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override;
 
@@ -52,8 +53,9 @@ public:
 
 private:
   void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
-                           bool scale_by_half, EFBCopyFormat dst_format,
-                           bool is_intensity) override;
+                           bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity,
+                           float gamma, bool clamp_top, bool clamp_bottom,
+                           const CopyFilterCoefficientArray& filter_coefficients) override;
 
   std::unique_ptr<StreamBuffer> m_texture_upload_buffer;
 
diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
index 6c584fcc95..cf5d7075f7 100644
--- a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
+++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
@@ -38,6 +38,11 @@ struct EFBEncodeParams
 {
   std::array<s32, 4> position_uniform;
   float y_scale;
+  float gamma_rcp;
+  float clamp_top;
+  float clamp_bottom;
+  s32 filter_coefficients[3];
+  u32 padding;
 };
 }
 TextureConverter::TextureConverter()
@@ -201,10 +206,11 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
   draw.EndRenderPass();
 }
 
-void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr,
-                                             const EFBCopyParams& params, u32 native_width,
-                                             u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
-                                             const EFBRectangle& src_rect, bool scale_by_half)
+void TextureConverter::EncodeTextureToMemory(
+    VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
+    u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
+    bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
+    const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients)
 {
   VkShaderModule shader = GetEncodingShader(params);
   if (shader == VK_NULL_HANDLE)
@@ -236,14 +242,21 @@ void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_p
   encoder_params.position_uniform[1] = src_rect.top;
   encoder_params.position_uniform[2] = static_cast<s32>(native_width);
   encoder_params.position_uniform[3] = scale_by_half ? 2 : 1;
-  encoder_params.y_scale = params.y_scale;
-  draw.SetPushConstants(&encoder_params, sizeof(encoder_params));
+  encoder_params.y_scale = y_scale;
+  encoder_params.gamma_rcp = 1.0f / gamma;
+  encoder_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
+  encoder_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 0.0f;
+  for (size_t i = 0; i < filter_coefficients.size(); i++)
+    encoder_params.filter_coefficients[i] = filter_coefficients[i];
+  u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(EFBEncodeParams));
+  std::memcpy(ubo_ptr, &encoder_params, sizeof(EFBEncodeParams));
+  draw.CommitPSUniforms(sizeof(EFBEncodeParams));
 
   // We also linear filtering for both box filtering and downsampling higher resolutions to 1x
   // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more
   //       complex down filtering to average all pixels and produce the correct result.
   bool linear_filter =
-      (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || params.y_scale > 1.0f;
+      (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f;
   draw.SetPSSampler(0, src_texture,
                     linear_filter ? g_object_cache->GetLinearSampler() :
                                     g_object_cache->GetPointSampler());
diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.h b/Source/Core/VideoBackends/Vulkan/TextureConverter.h
index f277c2d597..f85efc4d5c 100644
--- a/Source/Core/VideoBackends/Vulkan/TextureConverter.h
+++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.h
@@ -40,9 +40,12 @@ public:
 
   // Uses an encoding shader to copy src_texture to dest_ptr.
   // NOTE: Executes the current command buffer.
-  void EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params,
-                             u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
-                             u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half);
+  void
+  EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params,
+                        u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
+                        const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
+                        float gamma, bool clamp_top, bool clamp_bottom,
+                        const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients);
 
   bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format);
   void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry,
diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h
index 4a1cea9d48..0f287b4f99 100644
--- a/Source/Core/VideoCommon/BPMemory.h
+++ b/Source/Core/VideoCommon/BPMemory.h
@@ -970,6 +970,8 @@ union UPE_Copy
 
 union CopyFilterCoefficients
 {
+  using Values = std::array<u8, 7>;
+
   u64 Hex;
 
   BitField<0, 6, u64> w0;
@@ -980,7 +982,7 @@ union CopyFilterCoefficients
   BitField<38, 6, u64> w5;
   BitField<44, 6, u64> w6;
 
-  std::array<u8, 7> GetCoefficients() const
+  Values GetCoefficients() const
   {
     return {
         static_cast<u8>(w0), static_cast<u8>(w1), static_cast<u8>(w2), static_cast<u8>(w3),
diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp
index 143132ecaf..0e0d639a63 100644
--- a/Source/Core/VideoCommon/BPStructs.cpp
+++ b/Source/Core/VideoCommon/BPStructs.cpp
@@ -229,10 +229,13 @@ static void BPWritten(const BPCmd& bp)
     {
       // bpmem.zcontrol.pixel_format to PEControl::Z24 is when the game wants to copy from ZBuffer
       // (Zbuffer uses 24-bit Format)
+      static constexpr CopyFilterCoefficients::Values filter_coefficients = {
+          {0, 0, 21, 22, 21, 0, 0}};
       bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24;
       g_texture_cache->CopyRenderTargetToTexture(
           destAddr, PE_copy.tp_realFormat(), srcRect.GetWidth(), srcRect.GetHeight(), destStride,
-          is_depth_copy, srcRect, !!PE_copy.intensity_fmt, !!PE_copy.half_scale, 1.0f, 1.0f);
+          is_depth_copy, srcRect, !!PE_copy.intensity_fmt, !!PE_copy.half_scale, 1.0f, 1.0f,
+          bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom, filter_coefficients);
     }
     else
     {
@@ -260,9 +263,10 @@ static void BPWritten(const BPCmd& bp)
                 bpmem.copyTexSrcWH.x + 1, destStride, height, yScale);
 
       bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24;
-      g_texture_cache->CopyRenderTargetToTexture(destAddr, EFBCopyFormat::XFB, srcRect.GetWidth(),
-                                                 height, destStride, is_depth_copy, srcRect, false,
-                                                 false, yScale, s_gammaLUT[PE_copy.gamma]);
+      g_texture_cache->CopyRenderTargetToTexture(
+          destAddr, EFBCopyFormat::XFB, srcRect.GetWidth(), height, destStride, is_depth_copy,
+          srcRect, false, false, yScale, s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top,
+          bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients());
 
       // This stays in to signal end of a "frame"
       g_renderer->RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]);
diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp
index 1598fe0a00..76d40f5aa3 100644
--- a/Source/Core/VideoCommon/RenderBase.cpp
+++ b/Source/Core/VideoCommon/RenderBase.cpp
@@ -680,7 +680,7 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
       // TODO: merge more generic parts into VideoCommon
       {
         std::lock_guard<std::mutex> guard(m_swap_mutex);
-        g_renderer->SwapImpl(xfb_entry->texture.get(), xfb_rect, ticks, xfb_entry->gamma);
+        g_renderer->SwapImpl(xfb_entry->texture.get(), xfb_rect, ticks);
       }
 
       // Update the window size based on the frame that was just rendered.
diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h
index e7f7668f22..7e4f4e00f7 100644
--- a/Source/Core/VideoCommon/RenderBase.h
+++ b/Source/Core/VideoCommon/RenderBase.h
@@ -175,8 +175,7 @@ public:
   // Finish up the current frame, print some stats
   void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc,
             u64 ticks);
-  virtual void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks,
-                        float Gamma = 1.0f) = 0;
+  virtual void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) = 0;
 
   PEControl::PixelFormat GetPrevPixelFormat() const { return m_prev_efb_format; }
   void StorePixelFormat(PEControl::PixelFormat new_format) { m_prev_efb_format = new_format; }
diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp
index 9ac75d31f0..f0b1392531 100644
--- a/Source/Core/VideoCommon/TextureCacheBase.cpp
+++ b/Source/Core/VideoCommon/TextureCacheBase.cpp
@@ -1499,10 +1499,39 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda
   }
 }
 
-void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width,
-                                                 u32 height, u32 dstStride, bool is_depth_copy,
-                                                 const EFBRectangle& srcRect, bool isIntensity,
-                                                 bool scaleByHalf, float y_scale, float gamma)
+TextureCacheBase::CopyFilterCoefficientArray
+TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
+{
+  // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
+  // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
+  return {static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
+          static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
+              static_cast<u32>(coefficients[4]),
+          static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])};
+}
+
+TextureCacheBase::CopyFilterCoefficientArray
+TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
+{
+  // If the user disables the copy filter, only apply it to the VRAM copy.
+  // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
+  CopyFilterCoefficientArray res = GetRAMCopyFilterCoefficients(coefficients);
+  if (!g_ActiveConfig.bDisableCopyFilter)
+    return res;
+
+  // Disabling the copy filter in options should not ignore the values the game sets completely,
+  // as some games use the filter coefficients to control the brightness of the screen. Instead,
+  // add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
+  res[1] += res[0] + res[2];
+  res[0] = 0;
+  res[2] = 0;
+  return res;
+}
+
+void TextureCacheBase::CopyRenderTargetToTexture(
+    u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy,
+    const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma,
+    bool clamp_top, bool clamp_bottom, const CopyFilterCoefficients::Values& filter_coefficients)
 {
   // Emulation methods:
   //
@@ -1622,8 +1651,10 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
   if (copy_to_ram)
   {
     PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
-    EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, y_scale);
-    CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf);
+    EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity);
+    CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf,
+            y_scale, gamma, clamp_top, clamp_bottom,
+            GetRAMCopyFilterCoefficients(filter_coefficients));
   }
   else
   {
@@ -1742,8 +1773,6 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
     {
       entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy);
       entry->SetDimensions(tex_w, tex_h, 1);
-      entry->gamma = gamma;
-
       entry->frameCount = FRAMECOUNT_INVALID;
       if (is_xfb_copy)
       {
@@ -1757,7 +1786,9 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
       entry->may_have_overlapping_textures = false;
       entry->is_custom_tex = false;
 
-      CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, dstFormat, isIntensity);
+      CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, dstFormat, isIntensity, gamma,
+                          clamp_top, clamp_bottom,
+                          GetVRAMCopyFilterCoefficients(filter_coefficients));
 
       u64 hash = entry->CalculateHash();
       entry->SetHashes(hash, hash);
diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h
index 6dce346c24..720e95b470 100644
--- a/Source/Core/VideoCommon/TextureCacheBase.h
+++ b/Source/Core/VideoCommon/TextureCacheBase.h
@@ -47,23 +47,21 @@ struct TextureAndTLUTFormat
 struct EFBCopyParams
 {
   EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_,
-                bool yuv_, float y_scale_)
-      : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
-        y_scale(y_scale_)
+                bool yuv_)
+      : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_)
   {
   }
 
   bool operator<(const EFBCopyParams& rhs) const
   {
-    return std::tie(efb_format, copy_format, depth, yuv, y_scale) <
-           std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.y_scale);
+    return std::tie(efb_format, copy_format, depth, yuv) <
+           std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv);
   }
 
   PEControl::PixelFormat efb_format;
   EFBCopyFormat copy_format;
   bool depth;
   bool yuv;
-  float y_scale;
 };
 
 struct TextureLookupInformation
@@ -108,6 +106,8 @@ private:
   static const int FRAMECOUNT_INVALID = 0;
 
 public:
+  using CopyFilterCoefficientArray = std::array<u32, 3>;
+
   struct TCacheEntry
   {
     // common members
@@ -126,7 +126,6 @@ public:
                                       // content, aren't just downscaled
     bool should_force_safe_hashing = false;  // for XFB
     bool is_xfb_copy = false;
-    float gamma = 1.0f;
     u64 id;
 
     bool reference_changed = false;  // used by xfb to determine when a reference xfb changed
@@ -216,7 +215,9 @@ public:
 
   virtual void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
                        u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
-                       bool scale_by_half) = 0;
+                       bool scale_by_half, float y_scale, float gamma, bool clamp_top,
+                       bool clamp_bottom,
+                       const CopyFilterCoefficientArray& filter_coefficients) = 0;
 
   virtual bool CompileShaders() = 0;
   virtual void DeleteShaders() = 0;
@@ -248,7 +249,9 @@ public:
   virtual void BindTextures();
   void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height,
                                  u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect,
-                                 bool isIntensity, bool scaleByHalf, float y_scale, float gamma);
+                                 bool isIntensity, bool scaleByHalf, float y_scale, float gamma,
+                                 bool clamp_top, bool clamp_bottom,
+                                 const CopyFilterCoefficients::Values& filter_coefficients);
 
   virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
                               TLUTFormat format) = 0;
@@ -315,13 +318,21 @@ private:
 
   virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
                                    const EFBRectangle& src_rect, bool scale_by_half,
-                                   EFBCopyFormat dst_format, bool is_intensity) = 0;
+                                   EFBCopyFormat dst_format, bool is_intensity, float gamma,
+                                   bool clamp_top, bool clamp_bottom,
+                                   const CopyFilterCoefficientArray& filter_coefficients) = 0;
 
   // Removes and unlinks texture from texture cache and returns it to the pool
   TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter);
 
   void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y);
 
+  // Precomputing the coefficients for the previous, current, and next lines for the copy filter.
+  CopyFilterCoefficientArray
+  GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
+  CopyFilterCoefficientArray
+  GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
+
   TexAddrCache textures_by_address;
   TexHashCache textures_by_hash;
   TexPool texture_pool;
diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp
index 4d1539572c..1912f2aee5 100644
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@@ -57,19 +57,44 @@ u16 GetEncodedSampleCount(EFBCopyFormat format)
   }
 }
 
-// block dimensions : widthStride, heightStride
-// texture dims : width, height, x offset, y offset
-static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
+static void WriteHeader(char*& p, APIType ApiType)
 {
-  // left, top, of source rectangle within source texture
-  // width of the destination rectangle, scale_factor (1 or 2)
-  if (ApiType == APIType::Vulkan)
-    WRITE(p,
-          "layout(std140, push_constant) uniform PCBlock { int4 position; float y_scale; } PC;\n");
-  else
+  if (ApiType == APIType::OpenGL)
   {
+    // left, top, of source rectangle within source texture
+    // width of the destination rectangle, scale_factor (1 or 2)
     WRITE(p, "uniform int4 position;\n");
     WRITE(p, "uniform float y_scale;\n");
+    WRITE(p, "uniform float gamma_rcp;\n");
+    WRITE(p, "uniform float2 clamp_tb;\n");
+    WRITE(p, "uniform int3 filter_coefficients;\n");
+    WRITE(p, "#define samp0 samp9\n");
+    WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n");
+    WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
+  }
+  else if (ApiType == APIType::Vulkan)
+  {
+    WRITE(p, "UBO_BINDING(std140, 1) uniform PSBlock {\n");
+    WRITE(p, "  int4 position;\n");
+    WRITE(p, "  float y_scale;\n");
+    WRITE(p, "  float gamma_rcp;\n");
+    WRITE(p, "  float2 clamp_tb;\n");
+    WRITE(p, "  int3 filter_coefficients;\n");
+    WRITE(p, "};\n");
+    WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
+    WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
+  }
+  else  // D3D
+  {
+    WRITE(p, "cbuffer PSBlock : register(b0) {\n");
+    WRITE(p, "  int4 position;\n");
+    WRITE(p, "  float y_scale;\n");
+    WRITE(p, "  float gamma_rcp;\n");
+    WRITE(p, "  float2 clamp_tb;\n");
+    WRITE(p, "  int3 filter_coefficients;\n");
+    WRITE(p, "};\n");
+    WRITE(p, "sampler samp0 : register(s0);\n");
+    WRITE(p, "Texture2DArray Tex0 : register(t0);\n");
   }
 
   // D3D does not have roundEven(), only round(), which is specified "to the nearest integer".
@@ -96,39 +121,100 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
   WRITE(p, "  val = int4(val.r >> 3, val.g >> 2, val.b >> 3, 1);\n");
   WRITE(p, "  return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n");
   WRITE(p, "}\n");
+}
 
-  int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format);
-  int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format);
-  int samples = GetEncodedSampleCount(format);
+static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType ApiType)
+{
+  auto WriteSampleOp = [&](int yoffset) {
+    if (!params.depth)
+    {
+      switch (params.efb_format)
+      {
+      case PEControl::RGB8_Z24:
+        WRITE(p, "RGBA8ToRGB8(");
+        break;
+      case PEControl::RGBA6_Z24:
+        WRITE(p, "RGBA8ToRGBA6(");
+        break;
+      case PEControl::RGB565_Z16:
+        WRITE(p, "RGBA8ToRGB565(");
+        break;
+      default:
+        WRITE(p, "(");
+        break;
+      }
+    }
+    else
+    {
+      // Handle D3D depth inversion.
+      if (ApiType == APIType::D3D || ApiType == APIType::Vulkan)
+        WRITE(p, "1.0 - (");
+      else
+        WRITE(p, "(");
+    }
 
-  if (ApiType == APIType::OpenGL)
+    if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
+      WRITE(p, "texture(samp0, float3(");
+    else
+      WRITE(p, "Tex0.Sample(samp0, float3(");
+
+    WRITE(p, "uv.x + xoffset * pixel_size.x, ");
+
+    // Reverse the direction for OpenGL, since positive numbers are distance from the bottom row.
+    if (yoffset != 0)
+    {
+      if (ApiType == APIType::OpenGL)
+        WRITE(p, "clamp(uv.y - float(%d) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
+      else
+        WRITE(p, "clamp(uv.y + float(%d) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
+    }
+    else
+    {
+      WRITE(p, "uv.y");
+    }
+
+    WRITE(p, ", 0.0)))");
+  };
+
+  // The copy filter applies to both color and depth copies. This has been verified on hardware.
+  // The filter is only applied to the RGB channels, the alpha channel is left intact.
+  WRITE(p, "float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n");
+  WRITE(p, "{\n");
+  WRITE(p, "  float4 prev_row = ");
+  WriteSampleOp(-1);
+  WRITE(p, ";\n");
+  WRITE(p, "  float4 current_row = ");
+  WriteSampleOp(0);
+  WRITE(p, ";\n");
+  WRITE(p, "  float4 next_row = ");
+  WriteSampleOp(1);
+  WRITE(p, ";\n");
+  WRITE(p,
+        "  float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n"
+        "                             int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n"
+        "                             int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n"
+        "                            int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n");
+  WRITE(p, "  return float4(col, current_row.a);\n");
+  WRITE(p, "}\n");
+}
+
+// block dimensions : widthStride, heightStride
+// texture dims : width, height, x offset, y offset
+static void WriteSwizzler(char*& p, const EFBCopyParams& params, EFBCopyFormat format,
+                          APIType ApiType)
+{
+  WriteHeader(p, ApiType);
+  WriteSampleFunction(p, params, ApiType);
+
+  if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
   {
-    WRITE(p, "#define samp0 samp9\n");
-    WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n");
-
-    WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
     WRITE(p, "void main()\n");
     WRITE(p, "{\n"
              "  int2 sampleUv;\n"
              "  int2 uv1 = int2(gl_FragCoord.xy);\n");
   }
-  else if (ApiType == APIType::Vulkan)
-  {
-    WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
-    WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
-
-    WRITE(p, "void main()\n");
-    WRITE(p, "{\n"
-             "  int2 sampleUv;\n"
-             "  int2 uv1 = int2(gl_FragCoord.xy);\n"
-             "  int4 position = PC.position;\n"
-             "  float y_scale = PC.y_scale;\n");
-  }
   else  // D3D
   {
-    WRITE(p, "sampler samp0 : register(s0);\n");
-    WRITE(p, "Texture2DArray Tex0 : register(t0);\n");
-
     WRITE(p, "void main(\n");
     WRITE(p, "  out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n");
     WRITE(p, "{\n"
@@ -136,6 +222,10 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
              "  int2 uv1 = int2(rawpos.xy);\n");
   }
 
+  int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format);
+  int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format);
+  int samples = GetEncodedSampleCount(format);
+
   WRITE(p, "  int x_block_position = (uv1.x >> %d) << %d;\n", IntLog2(blkH * blkW / samples),
         IntLog2(blkW));
   WRITE(p, "  int y_block_position = uv1.y << %d;\n", IntLog2(blkH));
@@ -167,51 +257,13 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
     WRITE(p, "  uv0.y = 1.0-uv0.y;\n");
   }
 
-  WRITE(p, "  float sample_offset = float(position.w) / float(%d);\n", EFB_WIDTH);
+  WRITE(p, "  float2 pixel_size = position.ww / float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT);
 }
 
 static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset,
                              APIType ApiType, const EFBCopyParams& params)
 {
-  WRITE(p, "  %s = ", dest);
-
-  if (!params.depth)
-  {
-    switch (params.efb_format)
-    {
-    case PEControl::RGB8_Z24:
-      WRITE(p, "RGBA8ToRGB8(");
-      break;
-    case PEControl::RGBA6_Z24:
-      WRITE(p, "RGBA8ToRGBA6(");
-      break;
-    case PEControl::RGB565_Z16:
-      WRITE(p, "RGBA8ToRGB565(");
-      break;
-    default:
-      WRITE(p, "(");
-      break;
-    }
-  }
-  else
-  {
-    // Handle D3D depth inversion.
-    if (ApiType == APIType::D3D || ApiType == APIType::Vulkan)
-      WRITE(p, "1.0 - (");
-    else
-      WRITE(p, "(");
-  }
-
-  if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
-  {
-    WRITE(p, "texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0))).%s;\n", xoffset,
-          colorComp);
-  }
-  else
-  {
-    WRITE(p, "Tex0.Sample(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0))).%s;\n", xoffset,
-          colorComp);
-  }
+  WRITE(p, "  %s = SampleEFB(uv0, pixel_size, %d).%s;\n", dest, xoffset, colorComp);
 }
 
 static void WriteColorToIntensity(char*& p, const char* src, const char* dest)
@@ -239,7 +291,7 @@ static void WriteEncoderEnd(char*& p)
 
 static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::R8, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::R8, ApiType);
   WRITE(p, "  float3 texSample;\n");
 
   WriteSampleColor(p, "rgb", "texSample", 0, ApiType, params);
@@ -261,7 +313,7 @@ static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyParams& param
 
 static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::R4, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::R4, ApiType);
   WRITE(p, "  float3 texSample;\n");
   WRITE(p, "  float4 color0;\n");
   WRITE(p, "  float4 color1;\n");
@@ -302,7 +354,7 @@ static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyParams& param
 
 static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::RA8, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::RA8, ApiType);
   WRITE(p, "  float4 texSample;\n");
 
   WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params);
@@ -320,7 +372,7 @@ static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& para
 
 static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::RA4, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::RA4, ApiType);
   WRITE(p, "  float4 texSample;\n");
   WRITE(p, "  float4 color0;\n");
   WRITE(p, "  float4 color1;\n");
@@ -352,7 +404,7 @@ static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyParams& para
 
 static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::RGB565, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::RGB565, ApiType);
   WRITE(p, "  float3 texSample0;\n");
   WRITE(p, "  float3 texSample1;\n");
 
@@ -377,7 +429,7 @@ static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyParams& p
 
 static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::RGB5A3, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::RGB5A3, ApiType);
 
   WRITE(p, "  float4 texSample;\n");
   WRITE(p, "  float color0;\n");
@@ -441,7 +493,7 @@ static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyParams& p
 
 static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::RGBA8, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::RGBA8, ApiType);
 
   WRITE(p, "  float4 texSample;\n");
   WRITE(p, "  float4 color0;\n");
@@ -466,7 +518,7 @@ static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& pa
 
 static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::R4, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::R4, ApiType);
   WRITE(p, "  float4 color0;\n");
   WRITE(p, "  float4 color1;\n");
 
@@ -488,7 +540,7 @@ static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EF
 
 static void WriteC8Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::R8, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::R8, ApiType);
 
   WriteSampleColor(p, comp, "ocol0.b", 0, ApiType, params);
   WriteSampleColor(p, comp, "ocol0.g", 1, ApiType, params);
@@ -501,7 +553,7 @@ static void WriteC8Encoder(char*& p, const char* comp, APIType ApiType, const EF
 static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType,
                             const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::RA4, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::RA4, ApiType);
   WRITE(p, "  float2 texSample;\n");
   WRITE(p, "  float4 color0;\n");
   WRITE(p, "  float4 color1;\n");
@@ -532,7 +584,7 @@ static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType,
 static void WriteCC8Encoder(char*& p, const char* comp, APIType ApiType,
                             const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::RA8, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::RA8, ApiType);
 
   WriteSampleColor(p, comp, "ocol0.bg", 0, ApiType, params);
   WriteSampleColor(p, comp, "ocol0.ra", 1, ApiType, params);
@@ -543,7 +595,7 @@ static void WriteCC8Encoder(char*& p, const char* comp, APIType ApiType,
 static void WriteZ8Encoder(char*& p, const char* multiplier, APIType ApiType,
                            const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::G8, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::G8, ApiType);
 
   WRITE(p, " float depth;\n");
 
@@ -564,7 +616,7 @@ static void WriteZ8Encoder(char*& p, const char* multiplier, APIType ApiType,
 
 static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::RA8, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::RA8, ApiType);
 
   WRITE(p, "  float depth;\n");
   WRITE(p, "  float3 expanded;\n");
@@ -596,7 +648,7 @@ static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyParams& para
 
 static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::GB8, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::GB8, ApiType);
 
   WRITE(p, "  float depth;\n");
   WRITE(p, "  float3 expanded;\n");
@@ -632,7 +684,7 @@ static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyParams& par
 
 static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::RGBA8, ApiType);
+  WriteSwizzler(p, params, EFBCopyFormat::RGBA8, ApiType);
 
   WRITE(p, "  float depth0;\n");
   WRITE(p, "  float depth1;\n");
@@ -672,18 +724,21 @@ static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyParams& para
 
 static void WriteXFBEncoder(char*& p, APIType ApiType, const EFBCopyParams& params)
 {
-  WriteSwizzler(p, EFBCopyFormat::XFB, ApiType);
-
-  WRITE(p, "  float3 y_const = float3(0.257, 0.504, 0.098);\n");
-  WRITE(p, "  float3 u_const = float3(-0.148, -0.291, 0.439);\n");
-  WRITE(p, "  float3 v_const = float3(0.439, -0.368, -0.071);\n");
-  WRITE(p, "  float3 color0;\n");
-  WRITE(p, "  float3 color1;\n");
+  WriteSwizzler(p, params, EFBCopyFormat::XFB, ApiType);
 
+  WRITE(p, "float3 color0, color1;\n");
   WriteSampleColor(p, "rgb", "color0", 0, ApiType, params);
   WriteSampleColor(p, "rgb", "color1", 1, ApiType, params);
-  WRITE(p, "  float3 average = (color0 + color1) * 0.5;\n");
 
+  // Gamma is only applied to XFB copies.
+  WRITE(p, "  color0 = pow(color0, gamma_rcp.xxx);\n");
+  WRITE(p, "  color1 = pow(color1, gamma_rcp.xxx);\n");
+
+  // Convert to YUV.
+  WRITE(p, "  const float3 y_const = float3(0.257, 0.504, 0.098);\n");
+  WRITE(p, "  const float3 u_const = float3(-0.148, -0.291, 0.439);\n");
+  WRITE(p, "  const float3 v_const = float3(0.439, -0.368, -0.071);\n");
+  WRITE(p, "  float3 average = (color0 + color1) * 0.5;\n");
   WRITE(p, "  ocol0.b = dot(color0,  y_const) + 0.0625;\n");
   WRITE(p, "  ocol0.g = dot(average, u_const) + 0.5;\n");
   WRITE(p, "  ocol0.r = dot(color1,  y_const) + 0.0625;\n");
diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp
index 74ab4b38d2..dce823ba5a 100644
--- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp
+++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp
@@ -38,34 +38,66 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
   if (api_type == APIType::OpenGL)
   {
     out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
-              "#define samp0 samp9\n"
-              "#define uv0 f_uv0\n"
+              "uniform float3 filter_coefficients;\n"
+              "uniform float gamma_rcp;\n"
+              "uniform float2 clamp_tb;\n"
+              "uniform float pixel_height;\n");
+    out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n"
+              "  return texture(samp9, float3(uv.x, clamp(uv.y - (y_offset * pixel_height), "
+              "clamp_tb.x, clamp_tb.y), %s));\n"
+              "}\n",
+              mono_depth ? "0.0" : "uv.z");
+    out.Write("#define uv0 f_uv0\n"
               "in vec3 uv0;\n"
               "out vec4 ocol0;\n"
-              "void main(){\n"
-              "  vec4 texcol = texture(samp0, %s);\n",
-              mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0");
+              "void main(){\n");
   }
   else if (api_type == APIType::Vulkan)
   {
-    out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"
-              "layout(location = 0) in vec3 uv0;\n"
+    out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n"
+              "  float3 filter_coefficients;\n"
+              "  float gamma_rcp;\n"
+              "  float2 clamp_tb;\n"
+              "  float pixel_height;\n"
+              "};\n");
+    out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
+    out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n"
+              "  return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
+              "clamp_tb.x, clamp_tb.y), %s));\n"
+              "}\n",
+              mono_depth ? "0.0" : "uv.z");
+    out.Write("layout(location = 0) in vec3 uv0;\n"
               "layout(location = 1) in vec4 col0;\n"
               "layout(location = 0) out vec4 ocol0;"
-              "void main(){\n"
-              "  vec4 texcol = texture(samp0, %s);\n",
-              mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0");
+              "void main(){\n");
   }
   else if (api_type == APIType::D3D)
   {
     out.Write("Texture2DArray tex0 : register(t0);\n"
               "SamplerState samp0 : register(s0);\n"
-              "void main(out float4 ocol0 : SV_Target,\n"
+              "uniform float3 filter_coefficients;\n"
+              "uniform float gamma_rcp;\n"
+              "uniform float2 clamp_tb;\n"
+              "uniform float pixel_height;\n\n");
+    out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n"
+              "  return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
+              "clamp_tb.x, clamp_tb.y), %s));\n"
+              "}\n",
+              mono_depth ? "0.0" : "uv.z");
+    out.Write("void main(out float4 ocol0 : SV_Target,\n"
               "          in float4 pos : SV_Position,\n"
-              "          in float3 uv0 : TEXCOORD0) {\n"
-              "  float4 texcol = tex0.Sample(samp0, uv0);\n");
+              "          in float3 uv0 : TEXCOORD0) {\n");
   }
 
+  // The copy filter applies to both color and depth copies. This has been verified on hardware.
+  // The filter is only applied to the RGB channels, the alpha channel is left intact.
+  out.Write("  float4 prev_row = SampleEFB(uv0, -1.0f);\n"
+            "  float4 current_row = SampleEFB(uv0, 0.0f);\n"
+            "  float4 next_row = SampleEFB(uv0, 1.0f);\n"
+            "  float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
+            "                         current_row.rgb * filter_coefficients[1] +\n"
+            "                         next_row.rgb * filter_coefficients[2], current_row.a);\n");
+
   if (uid_data->is_depth_copy)
   {
     if (api_type == APIType::D3D || api_type == APIType::Vulkan)
@@ -223,8 +255,8 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
       out.Write("  ocol0 = texcol;\n");
       break;
 
-    case EFBCopyFormat::XFB:  // XFB copy, we just pretend it's an RGBX copy
-      out.Write("  ocol0 = float4(texcol.rgb, 1.0);\n");
+    case EFBCopyFormat::XFB:
+      out.Write("  ocol0 = float4(pow(texcol.rgb, gamma_rcp.xxx), texcol.a);\n");
       break;
 
     default:
diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp
index d08d5b793a..abdf27dcd1 100644
--- a/Source/Core/VideoCommon/VideoConfig.cpp
+++ b/Source/Core/VideoCommon/VideoConfig.cpp
@@ -120,6 +120,7 @@ void VideoConfig::Refresh()
   iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);
   sPostProcessingShader = Config::Get(Config::GFX_ENHANCE_POST_SHADER);
   bForceTrueColor = Config::Get(Config::GFX_ENHANCE_FORCE_TRUE_COLOR);
+  bDisableCopyFilter = Config::Get(Config::GFX_ENHANCE_DISABLE_COPY_FILTER);
 
   stereo_mode = static_cast<StereoMode>(Config::Get(Config::GFX_STEREO_MODE));
   iStereoDepth = Config::Get(Config::GFX_STEREO_DEPTH);
diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h
index cc8e4cde88..03ccfca3ba 100644
--- a/Source/Core/VideoCommon/VideoConfig.h
+++ b/Source/Core/VideoCommon/VideoConfig.h
@@ -73,6 +73,7 @@ struct VideoConfig final
   int iMaxAnisotropy;
   std::string sPostProcessingShader;
   bool bForceTrueColor;
+  bool bDisableCopyFilter;
 
   // Information
   bool bShowFPS;