PixelShaderGen: Implement table-based fog range as in software renderer

2018-02-03 17:01:34 +10:00 · 2018-02-03 17:01:34 +10:00 · 340ee8fff8
parent e7d0aae5be
commit 340ee8fff8
5 changed files with 46 additions and 31 deletions
--- a/Source/Core/VideoCommon/ConstantManager.h
+++ b/Source/Core/VideoCommon/ConstantManager.h
@ -24,9 +24,10 @@ struct PixelShaderConstants
  std::array<int4, 6> indtexmtx;
  int4 fogcolor;
  int4 fogi;
-  std::array<float4, 2> fogf;
+  float4 fogf;
  std::array<float4, 3> fogrange;
  float4 zslope;
-  std::array<float, 2> efbscale;
+  std::array<float, 2> efbscale;  // .xy
  // Constants from here onwards are only used in ubershaders.
  u32 genmode;                  // .z
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@ -397,7 +397,8 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texg
            "\tint4 " I_INDTEXMTX "[6];\n"
            "\tint4 " I_FOGCOLOR ";\n"
            "\tint4 " I_FOGI ";\n"
-            "\tfloat4 " I_FOGF "[2];\n"
+            "\tfloat4 " I_FOGF ";\n"
            "\tfloat4 " I_FOGRANGE "[3];\n"
            "\tfloat4 " I_ZSLOPE ";\n"
            "\tfloat2 " I_EFBSCALE ";\n"
            "\tuint  bpmem_genmode;\n"
@ -1331,30 +1332,33 @@ static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
    // renderer)
    //       Maybe we want to use "ze = (A << B_SHF)/((B << B_SHF) - Zs)" instead?
    //       That's equivalent, but keeps the lower bits of Zs.
-    out.Write("\tfloat ze = (" I_FOGF "[1].x * 16777216.0) / float(" I_FOGI
+    out.Write("\tfloat ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI
-              ".y - (zCoord >> " I_FOGI ".w));\n");
+              ".w));\n");
  }
  else
  {
    // orthographic
    // ze = a*Zs    (here, no B_SHF)
-    out.Write("\tfloat ze = " I_FOGF "[1].x * float(zCoord) / 16777216.0;\n");
+    out.Write("\tfloat ze = " I_FOGF ".x * float(zCoord) / 16777216.0;\n");
  }
  // x_adjust = sqrt((x-center)^2 + k^2)/k
  // ze *= x_adjust
  // TODO Instead of this theoretical calculation, we should use the
  //      coefficient table given in the fog range BP registers!
  if (uid_data->fog_RangeBaseEnabled)
  {
    out.SetConstantsUsed(C_FOGF, C_FOGF);
-    out.Write("\tfloat x_adjust = (2.0 * (rawpos.x / " I_FOGF "[0].y)) - 1.0 - " I_FOGF "[0].x;\n");
+    out.Write("\tfloat offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n");
-    out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOGF "[0].z * " I_FOGF "[0].z) / " I_FOGF
+    out.Write("\tfloat floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n");
-              "[0].z;\n");
+    out.Write("\tuint indexlower = uint(floor(floatindex));\n");
    out.Write("\tuint indexupper = indexlower + 1u;\n");
    out.Write("\tfloat klower = " I_FOGRANGE "[indexlower >> 2u][indexlower & 3u];\n");
    out.Write("\tfloat kupper = " I_FOGRANGE "[indexupper >> 2u][indexupper & 3u];\n");
    out.Write("\tfloat k = lerp(klower, kupper, frac(floatindex));\n");
    out.Write("\tfloat x_adjust = sqrt(offset * offset + k * k) / k;\n");
    out.Write("\tze *= x_adjust;\n");
  }
-  out.Write("\tfloat fog = clamp(ze - " I_FOGF "[1].z, 0.0, 1.0);\n");
+  out.Write("\tfloat fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n");
  if (uid_data->fog_fsel > 3)
  {
--- a/Source/Core/VideoCommon/PixelShaderManager.cpp
+++ b/Source/Core/VideoCommon/PixelShaderManager.cpp
@ -110,16 +110,23 @@ void PixelShaderManager::SetConstants()
      // they are the coefficients from the center to the border of the screen
      // so to simplify I use the hi coefficient as K in the shader taking 256 as the scale
      // TODO: Shouldn't this be EFBToScaledXf?
-      constants.fogf[0][0] = ScreenSpaceCenter;
+      constants.fogf[2] = ScreenSpaceCenter;
-      constants.fogf[0][1] =
+      constants.fogf[3] =
          static_cast<float>(g_renderer->EFBToScaledX(static_cast<int>(2.0f * xfmem.viewport.wd)));
-      constants.fogf[0][2] = bpmem.fogRange.K[4].HI / 256.0f;
+
      for (size_t i = 0, vec_index = 0; i < ArraySize(bpmem.fogRange.K); i++)
      {
        constexpr float scale = 4.0f;
        constants.fogrange[vec_index / 4][vec_index % 4] = bpmem.fogRange.K[i].GetValue(0) * scale;
        vec_index++;
        constants.fogrange[vec_index / 4][vec_index % 4] = bpmem.fogRange.K[i].GetValue(1) * scale;
        vec_index++;
      }
    }
    else
    {
-      constants.fogf[0][0] = 0;
+      constants.fogf[2] = 0;
-      constants.fogf[0][1] = 1;
+      constants.fogf[3] = 1;
      constants.fogf[0][2] = 1;
    }
    dirty = true;
@ -409,17 +416,17 @@ void PixelShaderManager::SetFogParamChanged()
 {
  if (!g_ActiveConfig.bDisableFog)
  {
-    constants.fogf[1][0] = bpmem.fog.GetA();
+    constants.fogf[0] = bpmem.fog.GetA();
    constants.fogf[1] = bpmem.fog.GetC();
    constants.fogi[1] = bpmem.fog.b_magnitude;
    constants.fogf[1][2] = bpmem.fog.GetC();
    constants.fogi[3] = bpmem.fog.b_shift;
    constants.fogParam3 = bpmem.fog.c_proj_fsel.hex;
  }
  else
  {
-    constants.fogf[1][0] = 0.f;
+    constants.fogf[0] = 0.f;
    constants.fogf[1] = 0.f;
    constants.fogi[1] = 1;
    constants.fogf[1][2] = 0.f;
    constants.fogi[3] = 1;
    constants.fogParam3 = 0;
  }
--- a/Source/Core/VideoCommon/ShaderGenCommon.h
+++ b/Source/Core/VideoCommon/ShaderGenCommon.h
@ -301,6 +301,7 @@ inline const char* GetInterpolationQualifier(bool msaa, bool ssaa,
 #define I_FOGCOLOR "cfogcolor"
 #define I_FOGI "cfogi"
 #define I_FOGF "cfogf"
 #define I_FOGRANGE "cfogrange"
 #define I_ZSLOPE "czslope"
 #define I_EFBSCALE "cefbscale"
--- a/Source/Core/VideoCommon/UberShaderPixel.cpp
+++ b/Source/Core/VideoCommon/UberShaderPixel.cpp
@ -1157,28 +1157,30 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
            BitfieldExtract("bpmem_fogParam3", FogParam3().proj).c_str());
  out.Write("      // perspective\n"
            "      // ze = A/(B - (Zs >> B_SHF)\n"
-            "      ze = (" I_FOGF "[1].x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI
+            "      ze = (" I_FOGF ".x * 16777216.0) / float(" I_FOGI ".y - (zCoord >> " I_FOGI
            ".w));\n"
            "    } else {\n"
            "      // orthographic\n"
            "      // ze = a*Zs    (here, no B_SHF)\n"
-            "      ze = " I_FOGF "[1].x * float(zCoord) / 16777216.0;\n"
+            "      ze = " I_FOGF ".z * float(zCoord) / 16777216.0;\n"
            "    }\n"
            "\n"
            "    if (bool(%s)) {\n",
            BitfieldExtract("bpmem_fogRangeBase", FogRangeParams::RangeBase().Enabled).c_str());
  out.Write("      // x_adjust = sqrt((x-center)^2 + k^2)/k\n"
            "      // ze *= x_adjust\n"
-            "      // TODO Instead of this theoretical calculation, we should use the\n"
+            "      float offset = (2.0 * (rawpos.x / " I_FOGF ".w)) - 1.0 - " I_FOGF ".z;\n"
-            "      //      coefficient table given in the fog range BP registers!\n"
+            "      float floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);\n"
-            "      float x_adjust = (2.0 * (rawpos.x / " I_FOGF "[0].y)) - 1.0 - " I_FOGF
+            "      uint indexlower = uint(floor(floatindex));\n"
-            "[0].x; \n"
+            "      uint indexupper = indexlower + 1u;\n"
-            "      x_adjust = sqrt(x_adjust * x_adjust + " I_FOGF "[0].z * " I_FOGF
+            "      float klower = " I_FOGRANGE "[indexlower >> 2u][indexlower & 3u];\n"
-            "[0].z) / " I_FOGF "[0].z;\n"
+            "      float kupper = " I_FOGRANGE "[indexupper >> 2u][indexupper & 3u];\n"
            "      float k = lerp(klower, kupper, frac(floatindex));\n"
            "      float x_adjust = sqrt(offset * offset + k * k) / k;\n"
            "      ze *= x_adjust;\n"
            "    }\n"
            "\n"
-            "    float fog = clamp(ze - " I_FOGF "[1].z, 0.0, 1.0);\n"
+            "    float fog = clamp(ze - " I_FOGF ".y, 0.0, 1.0);\n"
            "\n"
            "    if (fog_function > 3u) {\n"
            "      switch (fog_function) {\n"