Merge pull request #6936 from stenzek/copy-filter-depth-fix

EFB2RAM: Apply copy filter as a float coefficient after sampling
2018-05-22 13:52:26 +10:00 · 2018-05-22 13:52:26 +10:00 · 7eaba154a4
parent f64bd401b6 f74dbc794c
commit 7eaba154a4
10 changed files with 38 additions and 35 deletions
--- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp
+++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp
@ -34,7 +34,7 @@ struct EFBEncodeParams
  float gamma_rcp;
  float clamp_top;
  float clamp_bottom;
-  s32 filter_coefficients[3];
+  float filter_coefficients[3];
  u32 padding;
 };

@ -169,4 +169,4 @@ ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyParams&
  m_encoding_shaders.emplace(params, newShader);
  return newShader;
 }
-}
+}  // namespace DX11
--- a/Source/Core/VideoBackends/D3D/TextureCache.cpp
+++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp
@ -276,7 +276,7 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
  };
  PixelConstants constants;
  for (size_t i = 0; i < filter_coefficients.size(); i++)
-    constants.filter_coefficients[i] = filter_coefficients[i] / 64.0f;
+    constants.filter_coefficients[i] = filter_coefficients[i];
  constants.gamma_rcp = 1.0f / gamma;
  constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
  constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;
@ -315,4 +315,4 @@ TextureCache::GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderU
  m_efb_to_tex_pixel_shaders.emplace(uid, shader);
  return shader;
 }
-}
+}  // namespace DX11
--- a/Source/Core/VideoBackends/OGL/TextureCache.cpp
+++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp
@ -558,12 +558,12 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
  glUniform2f(shader.clamp_tb_uniform,
              clamp_bottom ? (1.0f - src_rect.bottom / static_cast<float>(EFB_HEIGHT)) : 0.0f,
              clamp_top ? (1.0f - src_rect.top / static_cast<float>(EFB_HEIGHT)) : 1.0f);
-  glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0] / 64.0f,
-              filter_coefficients[1] / 64.0f, filter_coefficients[2] / 64.0f);
+  glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0], filter_coefficients[1],
+              filter_coefficients[2]);

  ProgramShaderCache::BindVertexFormat(nullptr);
  glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);

  g_renderer->RestoreAPIState();
 }
-}
+}  // namespace OGL
--- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp
+++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp
@ -50,7 +50,7 @@ std::unique_ptr<AbstractStagingTexture> s_encoding_readback_texture;

 const int renderBufferWidth = EFB_WIDTH * 4;
 const int renderBufferHeight = 1024;
-}
+}  // namespace

 static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params)
 {
@ -158,7 +158,7 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ
  glUniform1f(texconv_shader.y_scale_uniform, y_scale);
  glUniform1f(texconv_shader.gamma_rcp_uniform, 1.0f / gamma);
  glUniform2f(texconv_shader.clamp_tb_uniform, clamp_top, clamp_bottom);
-  glUniform3i(texconv_shader.filter_coefficients_uniform, filter_coefficients[0],
+  glUniform3f(texconv_shader.filter_coefficients_uniform, filter_coefficients[0],
              filter_coefficients[1], filter_coefficients[2]);

  const GLuint read_texture = params.depth ?
@ -171,6 +171,6 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ
  g_renderer->RestoreAPIState();
 }

-}  // namespace
+}  // namespace TextureConverter

 }  // namespace OGL
--- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
+++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
@ -244,7 +244,7 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
  };
  PixelUniforms uniforms;
  for (size_t i = 0; i < filter_coefficients.size(); i++)
-    uniforms.filter_coefficients[i] = filter_coefficients[i] / 64.0f;
+    uniforms.filter_coefficients[i] = filter_coefficients[i];
  uniforms.gamma_rcp = 1.0f / gamma;
  uniforms.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
  uniforms.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;
--- a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
+++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
@ -41,10 +41,10 @@ struct EFBEncodeParams
  float gamma_rcp;
  float clamp_top;
  float clamp_bottom;
-  s32 filter_coefficients[3];
+  float filter_coefficients[3];
  u32 padding;
 };
-}
+}  // namespace
 TextureConverter::TextureConverter()
 {
 }
--- a/Source/Core/VideoCommon/TextureCacheBase.cpp
+++ b/Source/Core/VideoCommon/TextureCacheBase.cpp
@ -1506,10 +1506,14 @@ TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterC
 {
  // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
  // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
-  return {static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
-          static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
-              static_cast<u32>(coefficients[4]),
-          static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])};
+  return {
+      static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
+          64.0f,
+      static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
+                         static_cast<u32>(coefficients[4])) /
+          64.0f,
+      static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
+          64.0f};
 }

 TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients(
--- a/Source/Core/VideoCommon/TextureCacheBase.h
+++ b/Source/Core/VideoCommon/TextureCacheBase.h
@ -109,7 +109,7 @@ private:

 public:
  // Reduced version of the full coefficient array, reduced to a single value for each row.
-  using CopyFilterCoefficientArray = std::array<u32, 3>;
+  using CopyFilterCoefficientArray = std::array<float, 3>;

  struct TCacheEntry
  {
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@ -67,7 +67,7 @@ static void WriteHeader(char*& p, APIType ApiType)
    WRITE(p, "uniform float y_scale;\n");
    WRITE(p, "uniform float gamma_rcp;\n");
    WRITE(p, "uniform float2 clamp_tb;\n");
-    WRITE(p, "uniform int3 filter_coefficients;\n");
+    WRITE(p, "uniform float3 filter_coefficients;\n");
    WRITE(p, "#define samp0 samp9\n");
    WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n");
    WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
@ -79,7 +79,7 @@ static void WriteHeader(char*& p, APIType ApiType)
    WRITE(p, "  float y_scale;\n");
    WRITE(p, "  float gamma_rcp;\n");
    WRITE(p, "  float2 clamp_tb;\n");
-    WRITE(p, "  int3 filter_coefficients;\n");
+    WRITE(p, "  float3 filter_coefficients;\n");
    WRITE(p, "};\n");
    WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
    WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
@ -91,7 +91,7 @@ static void WriteHeader(char*& p, APIType ApiType)
    WRITE(p, "  float y_scale;\n");
    WRITE(p, "  float gamma_rcp;\n");
    WRITE(p, "  float2 clamp_tb;\n");
-    WRITE(p, "  int3 filter_coefficients;\n");
+    WRITE(p, "  float3 filter_coefficients;\n");
    WRITE(p, "};\n");
    WRITE(p, "sampler samp0 : register(s0);\n");
    WRITE(p, "Texture2DArray Tex0 : register(t0);\n");
@ -191,21 +191,18 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A
    WRITE(p, "  float4 next_row = ");
    WriteSampleOp(1);
    WRITE(p, ";\n");
-    WRITE(
-        p,
-        "  float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n"
-        "                             int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n"
-        "                             int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n"
-        "                            int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n");
-    WRITE(p, "  return float4(col, current_row.a);\n");
+    WRITE(p, "  return float4(min(prev_row.rgb * filter_coefficients[0] +\n"
+             "                      current_row.rgb * filter_coefficients[1] +\n"
+             "                      next_row.rgb * filter_coefficients[2], \n"
+             "                    float3(1, 1, 1)), current_row.a);\n");
  }
  else
  {
    WRITE(p, "  float4 current_row = ");
    WriteSampleOp(0);
    WRITE(p, ";\n");
-    WRITE(p, "  return float4(clamp(int3(current_row.rgb * 255.0) * filter_coefficients[1], "
-             "int3(0, 0, 0), int3(255, 255, 255)), current_row.a);\n");
+    WRITE(p, "return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
+             "              current_row.a);\n");
  }
  WRITE(p, "}\n");
 }
@ -1422,4 +1419,4 @@ std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_form
  return ss.str();
 }

-}  // namespace
+}  // namespace TextureConversionShaderTiled
--- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp
+++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp
@ -97,15 +97,17 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
    out.Write("  float4 prev_row = SampleEFB(uv0, -1.0f);\n"
              "  float4 current_row = SampleEFB(uv0, 0.0f);\n"
              "  float4 next_row = SampleEFB(uv0, 1.0f);\n"
-              "  float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
-              "                         current_row.rgb * filter_coefficients[1] +\n"
-              "                         next_row.rgb * filter_coefficients[2], current_row.a);\n");
+              "  float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
+              "                               current_row.rgb * filter_coefficients[1] +\n"
+              "                               next_row.rgb * filter_coefficients[2], \n"
+              "                             float3(1, 1, 1)), current_row.a);\n");
  }
  else
  {
    out.Write(
        "  float4 current_row = SampleEFB(uv0, 0.0f);\n"
-        "  float4 texcol = float4(current_row.rgb * filter_coefficients[1], current_row.a);\n");
+        "  float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
+        "                         current_row.a);\n");
  }

  if (uid_data->is_depth_copy)