Merge pull request #10864 from TellowKrinkle/BetterLogicBlend

VideoCommon: Better logic op invert approximation
2023-01-31 01:55:55 -05:00 · 2023-01-31 01:55:55 -05:00 · 0f037a1af8
parent be8cbe3c66 600ad5f498
commit 0f037a1af8
6 changed files with 139 additions and 33 deletions
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@ -744,6 +744,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
                           bool per_pixel_depth, bool use_dual_source);
 static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data);
 static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data);
+static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data);
 static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
                       bool use_dual_source);
 static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data);
@ -1148,6 +1149,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos

  if (uid_data->logic_op_enable)
    WriteLogicOp(out, uid_data);
+  else if (uid_data->emulate_logic_op_with_blend)
+    WriteLogicOpBlend(out, uid_data);

  // Write the color and alpha values to the framebuffer
  // If using shader blend, we still use the separate alpha
@ -1803,6 +1806,29 @@ static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data)
  out.Write("\tprev = ({}) & 0xff;\n", logic_op_mode[uid_data->logic_op_mode]);
 }

+static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
+{
+  switch (static_cast<LogicOp>(uid_data->logic_op_mode))
+  {
+  case LogicOp::Clear:
+  case LogicOp::NoOp:
+    out.Write("\tprev = int4(0, 0, 0, 0);\n");
+    break;
+  case LogicOp::Copy:
+    // Do nothing!
+    break;
+  case LogicOp::CopyInverted:
+    out.Write("\tprev ^= 255;\n");
+    break;
+  case LogicOp::Set:
+  case LogicOp::Invert:  // In cooperation with blend
+    out.Write("\tprev = int4(255, 255, 255, 255);\n");
+    break;
+  default:
+    break;
+  }
+}
+
 static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
                       bool use_dual_source)
 {
--- a/Source/Core/VideoCommon/PixelShaderGen.h
+++ b/Source/Core/VideoCommon/PixelShaderGen.h
@ -58,8 +58,9 @@ struct pixel_shader_uid_data
  DstBlendFactor blend_dst_factor_alpha : 3;  // Only used with shader_framebuffer_fetch blend
  u32 blend_subtract : 1;                     // Only used with shader_framebuffer_fetch blend
  u32 blend_subtract_alpha : 1;               // Only used with shader_framebuffer_fetch blend
+  u32 emulate_logic_op_with_blend : 1;        // Only used with logic op blend emulation
  u32 logic_op_enable : 1;                    // Only used with shader_framebuffer_fetch logic ops
-  u32 logic_op_mode : 4;                      // Only used with shader_framebuffer_fetch logic ops
+  u32 logic_op_mode : 4;  // Only used with shader_framebuffer_fetch logic ops and blend emulation

  u32 texMtxInfo_n_projection : 8;  // 8x1 bit
  u32 tevindref_bi0 : 3;
--- a/Source/Core/VideoCommon/RenderState.cpp
+++ b/Source/Core/VideoCommon/RenderState.cpp
@ -185,6 +185,7 @@ void BlendingState::ApproximateLogicOpWithBlending()
 {
  struct LogicOpApproximation
  {
+    bool blendEnable;
    bool subtract;
    SrcBlendFactor srcfactor;
    DstBlendFactor dstfactor;
@ -193,31 +194,69 @@ void BlendingState::ApproximateLogicOpWithBlending()
  // but INVSRCCLR and INVDSTCLR were also aliased and were mixed.
  // Thus, NOR, EQUIV, INVERT, COPY_INVERTED, and OR_INVERTED duplicate(d) other values.
  static constexpr std::array<LogicOpApproximation, 16> approximations = {{
-      {false, SrcBlendFactor::Zero, DstBlendFactor::Zero},            // CLEAR
-      {false, SrcBlendFactor::DstClr, DstBlendFactor::Zero},          // AND
-      {true, SrcBlendFactor::One, DstBlendFactor::InvSrcClr},         // AND_REVERSE
-      {false, SrcBlendFactor::One, DstBlendFactor::Zero},             // COPY
-      {true, SrcBlendFactor::DstClr, DstBlendFactor::One},            // AND_INVERTED
-      {false, SrcBlendFactor::Zero, DstBlendFactor::One},             // NOOP
-      {false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr},  // XOR
-      {false, SrcBlendFactor::InvDstClr, DstBlendFactor::One},        // OR
-      {false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr},  // NOR
-      {false, SrcBlendFactor::InvDstClr, DstBlendFactor::Zero},       // EQUIV
-      {false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr},  // INVERT
-      {false, SrcBlendFactor::One, DstBlendFactor::InvDstAlpha},      // OR_REVERSE
-      {false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr},  // COPY_INVERTED
-      {false, SrcBlendFactor::InvDstClr, DstBlendFactor::One},        // OR_INVERTED
-      {false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr},  // NAND
-      {false, SrcBlendFactor::One, DstBlendFactor::One},              // SET
+      // clang-format off
+      {false, false, SrcBlendFactor::One,       DstBlendFactor::Zero},        // CLEAR (Shader outputs 0)
+      {true,  false, SrcBlendFactor::DstClr,    DstBlendFactor::Zero},        // AND
+      {true,  true,  SrcBlendFactor::One,       DstBlendFactor::InvSrcClr},   // AND_REVERSE
+      {false, false, SrcBlendFactor::One,       DstBlendFactor::Zero},        // COPY
+      {true,  true,  SrcBlendFactor::DstClr,    DstBlendFactor::One},         // AND_INVERTED
+      {true,  false, SrcBlendFactor::Zero,      DstBlendFactor::One},         // NOOP
+      {true,  false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr},   // XOR
+      {true,  false, SrcBlendFactor::InvDstClr, DstBlendFactor::One},         // OR
+      {true,  false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr},   // NOR
+      {true,  false, SrcBlendFactor::InvDstClr, DstBlendFactor::Zero},        // EQUIV
+      {true,  false, SrcBlendFactor::InvDstClr, DstBlendFactor::Zero},        // INVERT (Shader outputs 255)
+      {true,  false, SrcBlendFactor::One,       DstBlendFactor::InvDstAlpha}, // OR_REVERSE
+      {false, false, SrcBlendFactor::One,       DstBlendFactor::Zero},        // COPY_INVERTED (Shader inverts)
+      {true,  false, SrcBlendFactor::InvDstClr, DstBlendFactor::One},         // OR_INVERTED
+      {true,  false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr},   // NAND
+      {false, false, SrcBlendFactor::One,       DstBlendFactor::Zero},        // SET (Shader outputs 255)
+      // clang-format on
  }};

  logicopenable = false;
-  blendenable = true;
-  subtract = approximations[u32(logicmode.Value())].subtract;
-  srcfactor = approximations[u32(logicmode.Value())].srcfactor;
-  srcfactoralpha = approximations[u32(logicmode.Value())].srcfactor;
-  dstfactor = approximations[u32(logicmode.Value())].dstfactor;
-  dstfactoralpha = approximations[u32(logicmode.Value())].dstfactor;
+  usedualsrc = false;
+  const LogicOpApproximation& approximation = approximations[static_cast<u32>(logicmode.Value())];
+  if (approximation.blendEnable)
+  {
+    blendenable = true;
+    subtract = approximation.subtract;
+    srcfactor = approximation.srcfactor;
+    srcfactoralpha = approximation.srcfactor;
+    dstfactor = approximation.dstfactor;
+    dstfactoralpha = approximation.dstfactor;
+  }
+}
+
+bool BlendingState::LogicOpApproximationIsExact()
+{
+  switch (logicmode.Value())
+  {
+  case LogicOp::Clear:
+  case LogicOp::Set:
+  case LogicOp::NoOp:
+  case LogicOp::Invert:
+  case LogicOp::CopyInverted:
+  case LogicOp::Copy:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool BlendingState::LogicOpApproximationWantsShaderHelp()
+{
+  switch (logicmode.Value())
+  {
+  case LogicOp::Clear:
+  case LogicOp::Set:
+  case LogicOp::NoOp:
+  case LogicOp::Invert:
+  case LogicOp::CopyInverted:
+    return true;
+  default:
+    return false;
+  }
 }

 void SamplerState::Generate(const BPMemory& bp, u32 index)
--- a/Source/Core/VideoCommon/RenderState.h
+++ b/Source/Core/VideoCommon/RenderState.h
@ -109,6 +109,8 @@ union BlendingState
  // HACK: Replaces logical operations with blend operations.
  // Will not be bit-correct, and in some cases not even remotely in the same ballpark.
  void ApproximateLogicOpWithBlending();
+  bool LogicOpApproximationIsExact();
+  bool LogicOpApproximationWantsShaderHelp();

  BlendingState() = default;
  BlendingState(const BlendingState&) = default;
--- a/Source/Core/VideoCommon/ShaderCache.cpp
+++ b/Source/Core/VideoCommon/ShaderCache.cpp
@ -601,16 +601,6 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig(
  config.depth_state = depth_state;
  config.blending_state = blending_state;
  config.framebuffer_state = g_framebuffer_manager->GetEFBFramebufferState();
-
-  // We can use framebuffer fetch to emulate logic ops in the fragment shader.
-  if (config.blending_state.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp &&
-      !g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
-  {
-    WARN_LOG_FMT(VIDEO,
-                 "Approximating logic op with blending, this will produce incorrect rendering.");
-    config.blending_state.ApproximateLogicOpWithBlending();
-  }
-
  return config;
 }

@ -628,6 +618,22 @@ static GXPipelineUid ApplyDriverBugs(const GXPipelineUid& in)
    ps->ztest = EmulatedZ::Early;
  }

+  // If framebuffer fetch is available, we can emulate logic ops in the fragment shader
+  // and don't need the below blend approximation
+  if (blend.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp &&
+      !g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
+  {
+    if (!blend.LogicOpApproximationIsExact())
+      WARN_LOG_FMT(VIDEO,
+                   "Approximating logic op with blending, this will produce incorrect rendering.");
+    if (blend.LogicOpApproximationWantsShaderHelp())
+    {
+      ps->emulate_logic_op_with_blend = true;
+      ps->logic_op_mode = static_cast<u32>(blend.logicmode.Value());
+    }
+    blend.ApproximateLogicOpWithBlending();
+  }
+
  const bool benefits_from_ps_dual_source_off =
      (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
       g_ActiveConfig.backend_info.bSupportsFramebufferFetch) ||
@ -775,6 +781,18 @@ static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in)
  memcpy(&out, &in, sizeof(out));  // Copy padding
  if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader)
    out.vertex_format = nullptr;
+
+  // If framebuffer fetch is available, we can emulate logic ops in the fragment shader
+  // and don't need the below blend approximation
+  if (out.blending_state.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp &&
+      !g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
+  {
+    if (!out.blending_state.LogicOpApproximationIsExact())
+      WARN_LOG_FMT(VIDEO,
+                   "Approximating logic op with blending, this will produce incorrect rendering.");
+    out.blending_state.ApproximateLogicOpWithBlending();
+  }
+
  if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
  {
    // Always blend in shader
--- a/Source/Core/VideoCommon/UberShaderPixel.cpp
+++ b/Source/Core/VideoCommon/UberShaderPixel.cpp
@ -1092,6 +1092,26 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
              "    TevResult &= 0xff;\n"
              "  }}\n");
  }
+  else if (!host_config.backend_logic_op)
+  {
+    out.Write("  // Helpers for logic op blending approximations\n"
+              "  if (logic_op_enable) {{\n"
+              "    switch (logic_op_mode) {{\n");
+    out.Write("      case {}: // Clear\n", static_cast<u32>(LogicOp::Clear));
+    out.Write("        TevResult = int4(0, 0, 0, 0);\n"
+              "        break;\n");
+    out.Write("      case {}: // Copy Inverted\n", static_cast<u32>(LogicOp::CopyInverted));
+    out.Write("        TevResult ^= 0xff;\n"
+              "        break;\n");
+    out.Write("      case {}: // Set\n", static_cast<u32>(LogicOp::Set));
+    out.Write("      case {}: // Invert\n", static_cast<u32>(LogicOp::Invert));
+    out.Write("        TevResult = int4(255, 255, 255, 255);\n"
+              "        break;\n");
+    out.Write("      default:\n"
+              "        break;\n"
+              "    }}\n"
+              "  }}\n");
+  }

  // Some backends require that the shader outputs be uint when writing to a uint render target for
  // logic op.