From 340ee8fff86c3ec0d290566531fc58f9e7965b76 Mon Sep 17 00:00:00 2001
From: Stenzek <stenzek@gmail.com>
Date: Sat, 3 Feb 2018 17:01:34 +1000
Subject: [PATCH] PixelShaderGen: Implement table-based fog range as in
 software renderer

---
 Source/Core/VideoCommon/ConstantManager.h     |  5 ++--
 Source/Core/VideoCommon/PixelShaderGen.cpp    | 24 ++++++++++-------
 .../Core/VideoCommon/PixelShaderManager.cpp   | 27 ++++++++++++-------
 Source/Core/VideoCommon/ShaderGenCommon.h     |  1 +
 Source/Core/VideoCommon/UberShaderPixel.cpp   | 20 +++++++-------
 5 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h
index 46de3c2231..490e4db4db 100644
--- a/Source/Core/VideoCommon/ConstantManager.h
+++ b/Source/Core/VideoCommon/ConstantManager.h
@@ -24,9 +24,10 @@ struct PixelShaderConstants
   std::array<int4, 6> indtexmtx;
   int4 fogcolor;
   int4 fogi;
-  std::array<float4, 2> fogf;
+  float4 fogf;
+  std::array<float4, 3> fogrange;
   float4 zslope;
-  std::array<float, 2> efbscale;
+  std::array<float, 2> efbscale;  // .xy
 
   // Constants from here onwards are only used in ubershaders.
   u32 genmode;                  // .z
diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
index 4cd961c1dd..5e2d8e60ad 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@@ -397,7 +397,8 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texg
             "\tint4 " I_INDTEXMTX "[6];\n"
             "\tint4 " I_FOGCOLOR ";\n"
             "\tint4 " I_FOGI ";\n"
-            "\tfloat4 " I_FOGF "[2];\n"
+            "\tfloat4 " I_FOGF ";\n"
+            "\tfloat4 " I_FOGRANGE "[3];\n"
             "\tfloat4 " I_ZSLOPE ";\n"
             "\tfloat2 " I_EFBSCALE ";\n"
             "\tuint  bpmem_genmode;\n"
@@ -1331,30 +1332,33 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
     // renderer)
     //       Maybe we want to use "ze = (A << B_SHF)/((B << B_SHF) - Zs)" instead?
     //       That's equivalent, but keeps the lower bits of Zs.
-    out.Write("\tfloat ze = (" I_FOGF "[1].x * 16777216.0) / float(" I_FOGI
-              ".y - (zCoord >> " I_FOGI ".w));\n");
+    out.Write("\tfloat ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI
+              ".w));\n");
   }
   else
   {
     // orthographic
     // ze = a*Zs    (here, no B_SHF)
-    out.Write("\tfloat ze = " I_FOGF "[1].x * float(zCoord) / 16777216.0;\n");
+    out.Write("\tfloat ze = " I_FOGF ".x * float(zCoord) / 16777216.0;\n");
   }
 
   // x_adjust = sqrt((x-center)^2 + k^2)/k
   // ze *= x_adjust
-  // TODO Instead of this theoretical calculation, we should use the
-  //      coefficient table given in the fog range BP registers!
   if (uid_data->fog_RangeBaseEnabled)
   {
     out.SetConstantsUsed(C_FOGF, C_FOGF);
-    out.Write("\tfloat x_adjust = (2.0 * (rawpos.x / " I_FOGF "[0].y)) - 1.0 - " I_FOGF "[0].x;\n");
-    out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOGF "[0].z * " I_FOGF "[0].z) / " I_FOGF
-              "[0].z;\n");
+    out.Write("\tfloat offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n");
+    out.Write("\tfloat floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n");
+    out.Write("\tuint indexlower = uint(floor(floatindex));\n");
+    out.Write("\tuint indexupper = indexlower + 1u;\n");
+    out.Write("\tfloat klower = " I_FOGRANGE "[indexlower >> 2u][indexlower & 3u];\n");
+    out.Write("\tfloat kupper = " I_FOGRANGE "[indexupper >> 2u][indexupper & 3u];\n");
+    out.Write("\tfloat k = lerp(klower, kupper, frac(floatindex));\n");
+    out.Write("\tfloat x_adjust = sqrt(offset * offset + k * k) / k;\n");
     out.Write("\tze *= x_adjust;\n");
   }
 
-  out.Write("\tfloat fog = clamp(ze - " I_FOGF "[1].z, 0.0, 1.0);\n");
+  out.Write("\tfloat fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n");
 
   if (uid_data->fog_fsel > 3)
   {
diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp
index 783f5e179c..3ff6f9f073 100644
--- a/Source/Core/VideoCommon/PixelShaderManager.cpp
+++ b/Source/Core/VideoCommon/PixelShaderManager.cpp
@@ -110,16 +110,23 @@ void PixelShaderManager::SetConstants()
       // they are the coefficients from the center to the border of the screen
       // so to simplify I use the hi coefficient as K in the shader taking 256 as the scale
       // TODO: Shouldn't this be EFBToScaledXf?
-      constants.fogf[0][0] = ScreenSpaceCenter;
-      constants.fogf[0][1] =
+      constants.fogf[2] = ScreenSpaceCenter;
+      constants.fogf[3] =
           static_cast<float>(g_renderer->EFBToScaledX(static_cast<int>(2.0f * xfmem.viewport.wd)));
-      constants.fogf[0][2] = bpmem.fogRange.K[4].HI / 256.0f;
+
+      for (size_t i = 0, vec_index = 0; i < ArraySize(bpmem.fogRange.K); i++)
+      {
+        constexpr float scale = 4.0f;
+        constants.fogrange[vec_index / 4][vec_index % 4] = bpmem.fogRange.K[i].GetValue(0) * scale;
+        vec_index++;
+        constants.fogrange[vec_index / 4][vec_index % 4] = bpmem.fogRange.K[i].GetValue(1) * scale;
+        vec_index++;
+      }
     }
     else
     {
-      constants.fogf[0][0] = 0;
-      constants.fogf[0][1] = 1;
-      constants.fogf[0][2] = 1;
+      constants.fogf[2] = 0;
+      constants.fogf[3] = 1;
     }
     dirty = true;
 
@@ -409,17 +416,17 @@ void PixelShaderManager::SetFogParamChanged()
 {
   if (!g_ActiveConfig.bDisableFog)
   {
-    constants.fogf[1][0] = bpmem.fog.GetA();
+    constants.fogf[0] = bpmem.fog.GetA();
+    constants.fogf[1] = bpmem.fog.GetC();
     constants.fogi[1] = bpmem.fog.b_magnitude;
-    constants.fogf[1][2] = bpmem.fog.GetC();
     constants.fogi[3] = bpmem.fog.b_shift;
     constants.fogParam3 = bpmem.fog.c_proj_fsel.hex;
   }
   else
   {
-    constants.fogf[1][0] = 0.f;
+    constants.fogf[0] = 0.f;
+    constants.fogf[1] = 0.f;
     constants.fogi[1] = 1;
-    constants.fogf[1][2] = 0.f;
     constants.fogi[3] = 1;
     constants.fogParam3 = 0;
   }
diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h
index 98a30faa88..5aa391c412 100644
--- a/Source/Core/VideoCommon/ShaderGenCommon.h
+++ b/Source/Core/VideoCommon/ShaderGenCommon.h
@@ -301,6 +301,7 @@ inline const char* GetInterpolationQualifier(bool msaa, bool ssaa,
 #define I_FOGCOLOR "cfogcolor"
 #define I_FOGI "cfogi"
 #define I_FOGF "cfogf"
+#define I_FOGRANGE "cfogrange"
 #define I_ZSLOPE "czslope"
 #define I_EFBSCALE "cefbscale"
 
diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp
index a0e01febe8..b87082b757 100644
--- a/Source/Core/VideoCommon/UberShaderPixel.cpp
+++ b/Source/Core/VideoCommon/UberShaderPixel.cpp
@@ -1157,28 +1157,30 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
             BitfieldExtract("bpmem_fogParam3", FogParam3().proj).c_str());
   out.Write("      // perspective\n"
             "      // ze = A/(B - (Zs >> B_SHF)\n"
-            "      ze = (" I_FOGF "[1].x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI
+            "      ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI
             ".w));\n"
             "    } else {\n"
             "      // orthographic\n"
             "      // ze = a*Zs    (here, no B_SHF)\n"
-            "      ze = " I_FOGF "[1].x * float(zCoord) / 16777216.0;\n"
+            "      ze = " I_FOGF ".z * float(zCoord) / 16777216.0;\n"
             "    }\n"
             "\n"
             "    if (bool(%s)) {\n",
             BitfieldExtract("bpmem_fogRangeBase", FogRangeParams::RangeBase().Enabled).c_str());
   out.Write("      // x_adjust = sqrt((x-center)^2 + k^2)/k\n"
             "      // ze *= x_adjust\n"
-            "      // TODO Instead of this theoretical calculation, we should use the\n"
-            "      //      coefficient table given in the fog range BP registers!\n"
-            "      float x_adjust = (2.0 * (rawpos.x / " I_FOGF "[0].y)) - 1.0 - " I_FOGF
-            "[0].x; \n"
-            "      x_adjust = sqrt(x_adjust * x_adjust + " I_FOGF "[0].z * " I_FOGF
-            "[0].z) / " I_FOGF "[0].z;\n"
+            "      float offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n"
+            "      float floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n"
+            "      uint indexlower = uint(floor(floatindex));\n"
+            "      uint indexupper = indexlower + 1u;\n"
+            "      float klower = " I_FOGRANGE "[indexlower >> 2u][indexlower & 3u];\n"
+            "      float kupper = " I_FOGRANGE "[indexupper >> 2u][indexupper & 3u];\n"
+            "      float k = lerp(klower, kupper, frac(floatindex));\n"
+            "      float x_adjust = sqrt(offset * offset + k * k) / k;\n"
             "      ze *= x_adjust;\n"
             "    }\n"
             "\n"
-            "    float fog = clamp(ze - " I_FOGF "[1].z, 0.0, 1.0);\n"
+            "    float fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n"
             "\n"
             "    if (fog_function > 3u) {\n"
             "      switch (fog_function) {\n"