From b285188de1a38565b9718bdac7fe698ee870d914 Mon Sep 17 00:00:00 2001
From: iwubcode <iwubcode@users.noreply.github.com>
Date: Sun, 2 Jul 2017 21:24:20 -0500
Subject: [PATCH] Video Backends: Implement vertical scaling for xfb copies. 
 This fixes the display of PAL games that run in 50hz mode.

---
 .../VideoBackends/D3D/PSTextureEncoder.cpp    |  7 ++++--
 .../VideoBackends/OGL/TextureConverter.cpp    | 11 ++++++----
 .../VideoBackends/Vulkan/TextureConverter.cpp | 22 +++++++++++++++----
 Source/Core/VideoCommon/BPStructs.cpp         |  4 ++--
 Source/Core/VideoCommon/RenderBase.cpp        | 11 +++++-----
 Source/Core/VideoCommon/TextureCacheBase.cpp  | 20 +++++++++--------
 Source/Core/VideoCommon/TextureCacheBase.h    | 15 ++++++++-----
 .../VideoCommon/TextureConversionShader.cpp   |  9 ++++++--
 8 files changed, 65 insertions(+), 34 deletions(-)

diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp
index 7c05215948..2aafb299bf 100644
--- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp
+++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp
@@ -26,6 +26,8 @@ struct EFBEncodeParams
   s32 SrcTop;
   u32 DestWidth;
   u32 ScaleFactor;
+  float y_scale;
+  u32 padding[3];
 };
 
 PSTextureEncoder::PSTextureEncoder()
@@ -45,7 +47,7 @@ void PSTextureEncoder::Init()
   //       EFB2RAM copies use max (EFB_WIDTH * 4) by (EFB_HEIGHT / 4)
   //       XFB2RAM copies use max (EFB_WIDTH / 2) by (EFB_HEIGHT)
   D3D11_TEXTURE2D_DESC t2dd = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_B8G8R8A8_UNORM, EFB_WIDTH * 4,
-                                                    EFB_HEIGHT, 1, 1, D3D11_BIND_RENDER_TARGET);
+                                                    1024, 1, 1, D3D11_BIND_RENDER_TARGET);
   hr = D3D::device->CreateTexture2D(&t2dd, nullptr, &m_out);
   CHECK(SUCCEEDED(hr), "create efb encode output texture");
   D3D::SetDebugObjectName(m_out, "efb encoder output texture");
@@ -127,6 +129,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_w
     encode_params.SrcTop = src_rect.top;
     encode_params.DestWidth = native_width;
     encode_params.ScaleFactor = scale_by_half ? 2 : 1;
+    encode_params.y_scale = params.y_scale;
     D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, &encode_params, 0, 0);
     D3D::stateman->SetPixelConstants(m_encodeParams);
 
@@ -134,7 +137,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_w
     // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more
     //       complex down filtering to average all pixels and produce the correct result.
     // Also, box filtering won't be correct for anything other than 1x IR
-    if (scale_by_half || g_renderer->GetEFBScale() != 1)
+    if (scale_by_half || g_renderer->GetEFBScale() != 1 || params.y_scale > 1.0f)
       D3D::SetLinearCopySampler();
     else
       D3D::SetPointCopySampler();
diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp
index 67f1bcc28e..1fb55c8d24 100644
--- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp
+++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp
@@ -50,6 +50,7 @@ struct EncodingProgram
 {
   SHADER program;
   GLint copy_position_uniform;
+  GLint y_scale_uniform;
 };
 static std::map<EFBCopyParams, EncodingProgram> s_encoding_programs;
 
@@ -166,6 +167,7 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params)
     PanicAlert("Failed to compile texture encoding shader.");
 
   program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position");
+  program.y_scale_uniform = glGetUniformLocation(program.program.glprogid, "y_scale");
   return s_encoding_programs.emplace(params, program).first->second;
 }
 
@@ -217,7 +219,7 @@ void Shutdown()
 // dst_line_size, writeStride in bytes
 
 static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line_size,
-                                   u32 dstHeight, u32 writeStride, bool linearFilter)
+                                   u32 dstHeight, u32 writeStride, bool linearFilter, float y_scale)
 {
   // switch to texture converter frame buffer
   // attach render buffer as color destination
@@ -233,7 +235,7 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line
   // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more
   //       complex down filtering to average all pixels and produce the correct result.
   // Also, box filtering won't be correct for anything other than 1x IR
-  if (linearFilter || g_renderer->GetEFBScale() != 1)
+  if (linearFilter || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f)
     g_sampler_cache->BindLinearSampler(9);
   else
     g_sampler_cache->BindNearestSampler(9);
@@ -282,13 +284,14 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ
   texconv_shader.program.Bind();
   glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width,
               scale_by_half ? 2 : 1);
+  glUniform1f(texconv_shader.y_scale_uniform, params.y_scale);
 
   const GLuint read_texture = params.depth ?
                                   FramebufferManager::ResolveAndGetDepthTarget(src_rect) :
                                   FramebufferManager::ResolveAndGetRenderTarget(src_rect);
 
   EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride,
-                         scale_by_half && !params.depth);
+                         scale_by_half && !params.depth, params.y_scale);
 
   FramebufferManager::SetFramebuffer(0);
   g_renderer->RestoreAPIState();
@@ -308,7 +311,7 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* des
   // We enable linear filtering, because the GameCube does filtering in the vertical direction when
   // yscale is enabled.
   // Otherwise we get jaggies when a game uses yscaling (most PAL games)
-  EncodeToRamUsingShader(srcTexture, destAddr, dstWidth * 2, dstHeight, dstStride, true);
+  EncodeToRamUsingShader(srcTexture, destAddr, dstWidth * 2, dstHeight, dstStride, true, 1.0f);
   FramebufferManager::SetFramebuffer(0);
   OGLTexture::DisableStage(0);
   g_renderer->RestoreAPIState();
diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
index e4d0e167f9..75f274f917 100644
--- a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
+++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
@@ -5,6 +5,7 @@
 #include "VideoBackends/Vulkan/TextureConverter.h"
 
 #include <algorithm>
+#include <array>
 #include <cstddef>
 #include <cstring>
 #include <string>
@@ -32,6 +33,14 @@
 
 namespace Vulkan
 {
+  namespace
+  {
+    struct EFBEncodeParams
+    {
+      std::array<s32, 4> position_uniform;
+      float y_scale;
+    };
+  }
 TextureConverter::TextureConverter()
 {
 }
@@ -243,14 +252,19 @@ void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_p
                          VK_NULL_HANDLE, shader);
 
   // Uniform - int4 of left,top,native_width,scale
-  s32 position_uniform[4] = {src_rect.left, src_rect.top, static_cast<s32>(native_width),
-                             scale_by_half ? 2 : 1};
-  draw.SetPushConstants(position_uniform, sizeof(position_uniform));
+  EFBEncodeParams encoder_params;
+  encoder_params.position_uniform[0] = src_rect.left;
+  encoder_params.position_uniform[1] = src_rect.top;
+  encoder_params.position_uniform[2] = static_cast<s32>(native_width);
+  encoder_params.position_uniform[3] = scale_by_half ? 2 : 1;
+  encoder_params.y_scale = params.y_scale;
+  draw.SetPushConstants(&encoder_params, sizeof(encoder_params));
 
   // We also linear filtering for both box filtering and downsampling higher resolutions to 1x
   // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more
   //       complex down filtering to average all pixels and produce the correct result.
-  bool linear_filter = (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1;
+  bool linear_filter = (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 ||
+                       params.y_scale > 1.0f;
   draw.SetPSSampler(0, src_texture, linear_filter ? g_object_cache->GetLinearSampler() :
                                                     g_object_cache->GetPointSampler());
 
diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp
index e721831481..183f7d4f20 100644
--- a/Source/Core/VideoCommon/BPStructs.cpp
+++ b/Source/Core/VideoCommon/BPStructs.cpp
@@ -232,7 +232,7 @@ static void BPWritten(const BPCmd& bp)
       bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24;
       g_texture_cache->CopyRenderTargetToTexture(destAddr, PE_copy.tp_realFormat(), destStride,
                                                  is_depth_copy, srcRect, !!PE_copy.intensity_fmt,
-                                                 !!PE_copy.half_scale);
+                                                 !!PE_copy.half_scale, 1.0f);
     }
     else
     {
@@ -261,7 +261,7 @@ static void BPWritten(const BPCmd& bp)
       bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24;
       g_texture_cache->CopyRenderTargetToTexture(destAddr, EFBCopyFormat::XFB, destStride,
                                                  is_depth_copy, srcRect, false,
-                                                 false);
+                                                 false, yScale);
 
       // This stays in to signal end of a "frame"
       g_renderer->RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]);
diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp
index 28e607f8cb..168aece684 100644
--- a/Source/Core/VideoCommon/RenderBase.cpp
+++ b/Source/Core/VideoCommon/RenderBase.cpp
@@ -668,15 +668,16 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
     // Get the current XFB from texture cache
     auto* xfb_entry = g_texture_cache->GetTexture(xfbAddr, fbWidth, fbHeight, TextureFormat::XFB,
                                                   force_safe_texture_cache_hash);
-    
-    // TODO, check if xfb_entry is a duplicate of the previous frame and skip SwapImpl
 
-	m_previous_xfb_texture = xfb_entry->texture.get();
+    if (xfb_entry)
+    {
+      // TODO, check if xfb_entry is a duplicate of the previous frame and skip SwapImpl
 
       m_last_xfb_texture = xfb_entry->texture.get();
 
-    // TODO: merge more generic parts into VideoCommon
-    g_renderer->SwapImpl(xfb_entry->texture.get(), rc, ticks, Gamma);
+      // TODO: merge more generic parts into VideoCommon
+      g_renderer->SwapImpl(xfb_entry->texture.get(), rc, ticks, Gamma);
+    }
   }
 
   if (m_xfb_written)
diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp
index ed5a071177..beb5b584d8 100644
--- a/Source/Core/VideoCommon/TextureCacheBase.cpp
+++ b/Source/Core/VideoCommon/TextureCacheBase.cpp
@@ -376,7 +376,7 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
         u32 copy_width =
             std::min(entry->native_width - src_x, entry_to_update->native_width - dst_x);
         u32 copy_height =
-            std::min(entry->native_height - src_y, entry_to_update->native_height - dst_y);
+            std::min((entry->native_height * entry->y_scale) - src_y, (entry_to_update->native_height * entry_to_update->y_scale) - dst_y);
 
         // If one of the textures is scaled, scale both with the current efb scaling factor
         if (entry_to_update->native_width != entry_to_update->GetWidth() ||
@@ -385,9 +385,9 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
         {
           ScaleTextureCacheEntryTo(entry_to_update,
                                    g_renderer->EFBToScaledX(entry_to_update->native_width),
-                                   g_renderer->EFBToScaledY(entry_to_update->native_height));
+                                   g_renderer->EFBToScaledY(entry_to_update->native_height * entry_to_update->y_scale));
           ScaleTextureCacheEntryTo(entry, g_renderer->EFBToScaledX(entry->native_width),
-                                   g_renderer->EFBToScaledY(entry->native_height));
+                                   g_renderer->EFBToScaledY(entry->native_height * entry->y_scale));
 
           src_x = g_renderer->EFBToScaledX(src_x);
           src_y = g_renderer->EFBToScaledY(src_y);
@@ -794,7 +794,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::GetTexture(u32 address, u32 wid
 
     // Do not load strided EFB copies, they are not meant to be used directly.
     // Also do not directly load EFB copies, which were partly overwritten.
-    if (entry->IsCopy() && entry->native_width == nativeW && entry->native_height == nativeH &&
+    if (entry->IsCopy() && entry->native_width == nativeW && static_cast<unsigned int>(entry->native_height * entry->y_scale) == nativeH &&
         entry->memory_stride == entry->BytesPerRow() && !entry->may_have_overlapping_textures)
     {
       // EFB copies have slightly different rules as EFB copy formats have different
@@ -881,7 +881,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::GetTexture(u32 address, u32 wid
       TCacheEntry* entry = hash_iter->second;
       // All parameters, except the address, need to match here
       if (entry->format == full_format && entry->native_levels >= tex_levels &&
-          entry->native_width == nativeW && entry->native_height == nativeH)
+          entry->native_width == nativeW &&
+          static_cast<unsigned int>(entry->native_height * entry->y_scale) == nativeH)
       {
         entry = DoPartialTextureUpdates(hash_iter->second, &texMem[tlutaddr], tlutfmt);
 
@@ -1107,7 +1108,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::GetTexture(u32 address, u32 wid
 void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat,
                                                  u32 dstStride, bool is_depth_copy,
                                                  const EFBRectangle& srcRect, bool isIntensity,
-                                                 bool scaleByHalf)
+                                                 bool scaleByHalf, float y_scale)
 {
   // Emulation methods:
   //
@@ -1451,7 +1452,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
   const u32 blockW = TexDecoder_GetBlockWidthInTexels(baseFormat);
 
   // Round up source height to multiple of block size
-  u32 actualHeight = Common::AlignUp(tex_h, blockH);
+  u32 actualHeight = Common::AlignUp(static_cast<unsigned int>(tex_h * y_scale), blockH);
   const u32 actualWidth = Common::AlignUp(tex_w, blockW);
 
   u32 num_blocks_y = actualHeight / blockH;
@@ -1465,7 +1466,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
 
   if (copy_to_ram)
   {
-    EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity);
+    EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, y_scale);
     CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf);
   }
   else
@@ -1556,6 +1557,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
     {
       entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy);
       entry->SetDimensions(tex_w, tex_h, 1);
+      entry->y_scale = y_scale;
 
       entry->frameCount = FRAMECOUNT_INVALID;
       if (is_xfb_copy)
@@ -1731,7 +1733,7 @@ u32 TextureCacheBase::TCacheEntry::NumBlocksY() const
 {
   u32 blockH = TexDecoder_GetBlockHeightInTexels(format.texfmt);
   // Round up source height to multiple of block size
-  u32 actualHeight = Common::AlignUp(native_height, blockH);
+  u32 actualHeight = Common::AlignUp(static_cast<unsigned int>(native_height * y_scale), blockH);
 
   return actualHeight / blockH;
 }
diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h
index 53ab41a836..96b36b6eba 100644
--- a/Source/Core/VideoCommon/TextureCacheBase.h
+++ b/Source/Core/VideoCommon/TextureCacheBase.h
@@ -45,21 +45,22 @@ struct TextureAndTLUTFormat
 struct EFBCopyParams
 {
   EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_,
-                bool yuv_)
-      : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_)
+                bool yuv_, float y_scale_)
+      : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_), y_scale(y_scale_)
   {
   }
 
   bool operator<(const EFBCopyParams& rhs) const
   {
-    return std::tie(efb_format, copy_format, depth, yuv) <
-           std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv);
+    return std::tie(efb_format, copy_format, depth, yuv, y_scale) <
+           std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.y_scale);
   }
 
   PEControl::PixelFormat efb_format;
   EFBCopyFormat copy_format;
   bool depth;
   bool yuv;
+  float y_scale;
 };
 
 class TextureCacheBase
@@ -86,6 +87,7 @@ public:
                                       // content, aren't just downscaled
     bool should_force_safe_hashing = false;  // for XFB
     bool is_xfb_copy = false;
+    float y_scale = 1.0f;
 
     unsigned int native_width,
         native_height;  // Texture dimensions from the GameCube's point of view
@@ -188,7 +190,7 @@ public:
   virtual void BindTextures();
   void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 dstStride,
                                  bool is_depth_copy, const EFBRectangle& srcRect, bool isIntensity,
-                                 bool scaleByHalf);
+                                 bool scaleByHalf, float y_scale);
 
   virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
                               TLUTFormat format) = 0;
@@ -210,6 +212,8 @@ public:
   {
   }
 
+  void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height);
+
 protected:
   TextureCacheBase();
 
@@ -235,7 +239,6 @@ private:
 
   TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt);
 
-  void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height);
   TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
                                        TLUTFormat tlutfmt);
 
diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp
index 74e50cd27f..04b7bb3a01 100644
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@@ -64,9 +64,12 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
   // left, top, of source rectangle within source texture
   // width of the destination rectangle, scale_factor (1 or 2)
   if (ApiType == APIType::Vulkan)
-    WRITE(p, "layout(std140, push_constant) uniform PCBlock { int4 position; } PC;\n");
+    WRITE(p, "layout(std140, push_constant) uniform PCBlock { int4 position; float y_scale; } PC;\n");
   else
+  {
     WRITE(p, "uniform int4 position;\n");
+    WRITE(p, "uniform float y_scale;\n");
+  }
 
   // Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel.
   WRITE(p, "float4 RGBA8ToRGB8(float4 src)\n");
@@ -111,7 +114,8 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
     WRITE(p, "{\n"
              "  int2 sampleUv;\n"
              "  int2 uv1 = int2(gl_FragCoord.xy);\n"
-             "  int4 position = PC.position;\n");
+             "  int4 position = PC.position;\n"
+             "  float y_scale = PC.y_scale;\n");
   }
   else  // D3D
   {
@@ -150,6 +154,7 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
                                               // pixel)
   WRITE(p, "  uv0 += float2(position.xy);\n");                    // move to copied rect
   WRITE(p, "  uv0 /= float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT);  // normalize to [0:1]
+  WRITE(p, "  uv0 /= float2(1, y_scale);\n");                 // apply the y scaling
   if (ApiType == APIType::OpenGL)                                 // ogl has to flip up and down
   {
     WRITE(p, "  uv0.y = 1.0-uv0.y;\n");