Merge pull request #4085 from Armada651/vertex-depth

VideoCommon: Implement depth range equation in vertex shader.
2016-08-19 02:11:37 +02:00 · 2016-08-19 02:11:37 +02:00 · 40f4308dc2
parent 0015d2e86b e86d7cbc99
commit 40f4308dc2
17 changed files with 135 additions and 40 deletions
--- a/Source/Core/VideoBackends/D3D/D3DState.cpp
+++ b/Source/Core/VideoBackends/D3D/D3DState.cpp
@ -440,7 +440,7 @@ ID3D11RasterizerState* StateCache::Get(RasterizerState state)
    return it->second;

  D3D11_RASTERIZER_DESC rastdc = CD3D11_RASTERIZER_DESC(D3D11_FILL_SOLID, state.cull_mode, false, 0,
-                                                        0.f, 0, true, true, false, false);
+                                                        0.f, 0, false, true, false, false);

  ID3D11RasterizerState* res = nullptr;

--- a/Source/Core/VideoBackends/D3D/Render.cpp
+++ b/Source/Core/VideoBackends/D3D/Render.cpp
@ -578,14 +578,10 @@ void Renderer::SetViewport()
  Wd = (X + Wd <= GetTargetWidth()) ? Wd : (GetTargetWidth() - X);
  Ht = (Y + Ht <= GetTargetHeight()) ? Ht : (GetTargetHeight() - Y);

-  D3D11_VIEWPORT vp = CD3D11_VIEWPORT(
-      X, Y, Wd, Ht,
-      1.0f - MathUtil::Clamp<float>(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f,
-      1.0f -
-          MathUtil::Clamp<float>(xfmem.viewport.farZ - MathUtil::Clamp<float>(xfmem.viewport.zRange,
-                                                                              0.0f, 16777216.0f),
-                                 0.0f, 16777215.0f) /
-              16777216.0f);
+  // We do depth clipping and depth range in the vertex shader instead of relying
+  // on the graphics API. However we still need to ensure depth values don't exceed
+  // the maximum value supported by the console GPU.
+  D3D11_VIEWPORT vp = CD3D11_VIEWPORT(X, Y, Wd, Ht, D3D11_MIN_DEPTH, GX_MAX_DEPTH);
  D3D::context->RSSetViewports(1, &vp);
 }

--- a/Source/Core/VideoBackends/D3D/main.cpp
+++ b/Source/Core/VideoBackends/D3D/main.cpp
@ -71,6 +71,7 @@ void VideoBackend::InitBackendInfo()
  g_Config.backend_info.bSupportsPostProcessing = false;
  g_Config.backend_info.bSupportsPaletteConversion = true;
  g_Config.backend_info.bSupportsClipControl = true;
+  g_Config.backend_info.bSupportsDepthClamp = true;

  IDXGIFactory* factory;
  IDXGIAdapter* ad;
--- a/Source/Core/VideoBackends/D3D12/D3DState.cpp
+++ b/Source/Core/VideoBackends/D3D12/D3DState.cpp
@ -329,7 +329,7 @@ D3D12_RASTERIZER_DESC StateCache::GetDesc12(RasterizerState state)
          0,
          0.f,
          0,
-          true,
+          false,
          true,
          false,
          0,
--- a/Source/Core/VideoBackends/D3D12/Render.cpp
+++ b/Source/Core/VideoBackends/D3D12/Render.cpp
@ -482,15 +482,10 @@ void Renderer::SetViewport()
  width = (x + width <= GetTargetWidth()) ? width : (GetTargetWidth() - x);
  height = (y + height <= GetTargetHeight()) ? height : (GetTargetHeight() - y);

-  D3D12_VIEWPORT vp = {
-      x, y, width, height,
-      1.0f - MathUtil::Clamp<float>(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f,
-      1.0f -
-          MathUtil::Clamp<float>(xfmem.viewport.farZ - MathUtil::Clamp<float>(xfmem.viewport.zRange,
-                                                                              0.0f, 16777216.0f),
-                                 0.0f, 16777215.0f) /
-              16777216.0f};
-
+  // We do depth clipping and depth range in the vertex shader instead of relying
+  // on the graphics API. However we still need to ensure depth values don't exceed
+  // the maximum value supported by the console GPU.
+  D3D12_VIEWPORT vp = {x, y, width, height, D3D12_MIN_DEPTH, GX_MAX_DEPTH};
  D3D::current_command_list->RSSetViewports(1, &vp);
 }

--- a/Source/Core/VideoBackends/D3D12/main.cpp
+++ b/Source/Core/VideoBackends/D3D12/main.cpp
@ -74,6 +74,7 @@ void VideoBackend::InitBackendInfo()
  g_Config.backend_info.bSupportsPostProcessing = false;
  g_Config.backend_info.bSupportsPaletteConversion = true;
  g_Config.backend_info.bSupportsClipControl = true;
+  g_Config.backend_info.bSupportsDepthClamp = true;

  IDXGIFactory* factory;
  IDXGIAdapter* ad;
--- a/Source/Core/VideoBackends/OGL/Render.cpp
+++ b/Source/Core/VideoBackends/OGL/Render.cpp
@ -481,6 +481,9 @@ Renderer::Renderer()
  g_Config.backend_info.bSupportsBindingLayout =
      GLExtensions::Supports("GL_ARB_shading_language_420pack");

+  // Clip distance support is useless without a method to clamp the depth range
+  g_Config.backend_info.bSupportsDepthClamp = GLExtensions::Supports("GL_ARB_depth_clamp");
+
  g_ogl_config.bSupportsGLSLCache = GLExtensions::Supports("GL_ARB_get_program_binary");
  g_ogl_config.bSupportsGLPinnedMemory = GLExtensions::Supports("GL_AMD_pinned_memory");
  g_ogl_config.bSupportsGLSync = GLExtensions::Supports("GL_ARB_sync");
@ -520,6 +523,10 @@ Renderer::Renderer()
    g_ogl_config.bSupportsGLSLCache = true;
    g_ogl_config.bSupportsGLSync = true;

+    // TODO: Implement support for GL_EXT_clip_cull_distance when there is an extension for
+    // depth clamping.
+    g_Config.backend_info.bSupportsDepthClamp = false;
+
    if (strstr(g_ogl_config.glsl_version, "3.0"))
    {
      g_ogl_config.eSupportedGLSLVersion = GLSLES_300;
@ -669,7 +676,7 @@ Renderer::Renderer()
                                   g_ogl_config.gl_renderer, g_ogl_config.gl_version),
                  5000);

-  WARN_LOG(VIDEO, "Missing OGL Extensions: %s%s%s%s%s%s%s%s%s%s%s%s%s",
+  WARN_LOG(VIDEO, "Missing OGL Extensions: %s%s%s%s%s%s%s%s%s%s%s%s%s%s",
           g_ActiveConfig.backend_info.bSupportsDualSourceBlend ? "" : "DualSourceBlend ",
           g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? "" : "PrimitiveRestart ",
           g_ActiveConfig.backend_info.bSupportsEarlyZ ? "" : "EarlyZ ",
@ -681,7 +688,8 @@ Renderer::Renderer()
           g_ActiveConfig.backend_info.bSupportsSSAA ? "" : "SSAA ",
           g_ActiveConfig.backend_info.bSupportsGSInstancing ? "" : "GSInstancing ",
           g_ActiveConfig.backend_info.bSupportsClipControl ? "" : "ClipControl ",
-           g_ogl_config.bSupportsCopySubImage ? "" : "CopyImageSubData ");
+           g_ogl_config.bSupportsCopySubImage ? "" : "CopyImageSubData ",
+           g_ActiveConfig.backend_info.bSupportsDepthClamp ? "" : "DepthClamp ");

  s_last_multisamples = g_ActiveConfig.iMultisamples;
  s_MSAASamples = s_last_multisamples;
@ -724,6 +732,12 @@ Renderer::Renderer()
  glClearDepthf(1.0f);
  glEnable(GL_DEPTH_TEST);
  glDepthFunc(GL_LEQUAL);
+  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
+  {
+    glEnable(GL_CLIP_DISTANCE0);
+    glEnable(GL_CLIP_DISTANCE1);
+    glEnable(GL_DEPTH_CLAMP);
+  }

  glPixelStorei(GL_UNPACK_ALIGNMENT, 4);  // 4-byte pixel alignment

@ -1142,7 +1156,15 @@ void Renderer::SetViewport()
    auto iceilf = [](float f) { return static_cast<GLint>(ceilf(f)); };
    glViewport(iceilf(X), iceilf(Y), iceilf(Width), iceilf(Height));
  }
-  glDepthRangef(GLFar, GLNear);
+
+  // Set the reversed depth range. If we do depth clipping and depth range in the
+  // vertex shader we only need to ensure depth values don't exceed the maximum
+  // value supported by the console GPU. If not, we simply clamp the near/far values
+  // themselves to the maximum value as done above.
+  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
+    glDepthRangef(GX_MAX_DEPTH, 0.0f);
+  else
+    glDepthRangef(GLFar, GLNear);
 }

 void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,
@ -1647,6 +1669,11 @@ void Renderer::ResetAPIState()
  glDisable(GL_BLEND);
  if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL)
    glDisable(GL_COLOR_LOGIC_OP);
+  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
+  {
+    glDisable(GL_CLIP_DISTANCE0);
+    glDisable(GL_CLIP_DISTANCE1);
+  }
  glDepthMask(GL_FALSE);
  glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
 }
@ -1655,6 +1682,11 @@ void Renderer::RestoreAPIState()
 {
  // Gets us back into a more game-like state.
  glEnable(GL_SCISSOR_TEST);
+  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
+  {
+    glEnable(GL_CLIP_DISTANCE0);
+    glEnable(GL_CLIP_DISTANCE1);
+  }
  SetGenerationMode();
  BPFunctions::SetScissor();
  SetColorMask();
--- a/Source/Core/VideoBackends/OGL/main.cpp
+++ b/Source/Core/VideoBackends/OGL/main.cpp
@ -108,6 +108,7 @@ void VideoBackend::InitBackendInfo()
  g_Config.backend_info.bSupportsPrimitiveRestart = true;
  g_Config.backend_info.bSupportsPaletteConversion = true;
  g_Config.backend_info.bSupportsClipControl = true;
+  g_Config.backend_info.bSupportsDepthClamp = true;

  g_Config.backend_info.Adapters.clear();

--- a/Source/Core/VideoCommon/DriverDetails.cpp
+++ b/Source/Core/VideoCommon/DriverDetails.cpp
@ -74,6 +74,7 @@ static BugInfo m_known_bugs[] = {
    {OS_WINDOWS, VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE,
     101810.3907, 101810.3960, true},
    {OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true},
+    {OS_ALL, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, BUG_BROKENCLIPDISTANCE, -1.0, -1.0, true},
 };

 static std::map<Bug, BugInfo> m_bugs;
--- a/Source/Core/VideoCommon/DriverDetails.h
+++ b/Source/Core/VideoCommon/DriverDetails.h
@ -204,6 +204,15 @@ enum Bug
  // GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData
  // everywhere else.
  BUG_SLOWGETBUFFERSUBDATA,
+
+  // Bug: Broken lines in geometry shaders when writing to gl_ClipDistance in the vertex shader
+  // Affected Devices: Mesa i965
+  // Started Version: -1
+  // Ended Version: -1
+  // Writing to gl_ClipDistance in both the vertex shader and the geometry shader will break
+  // the geometry shader. Current workaround is to make sure the geometry shader always consumes
+  // the gl_ClipDistance inputs from the vertex shader.
+  BUG_BROKENCLIPDISTANCE,
 };

 // Initializes our internal vendor, device family, and driver version
--- a/Source/Core/VideoCommon/GeometryShaderGen.cpp
+++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp
@ -7,6 +7,7 @@

 #include "Common/CommonTypes.h"
 #include "VideoCommon/BPMemory.h"
+#include "VideoCommon/DriverDetails.h"
 #include "VideoCommon/GeometryShaderGen.h"
 #include "VideoCommon/LightingShaderGen.h"
 #include "VideoCommon/VideoCommon.h"
@ -211,6 +212,15 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid
  {
    out.Write("\tVS_OUTPUT f;\n");
    AssignVSOutputMembers(out, "f", "vs[i]", uid_data->numTexGens, uid_data->pixel_lighting);
+
+    if (g_ActiveConfig.backend_info.bSupportsDepthClamp &&
+        DriverDetails::HasBug(DriverDetails::BUG_BROKENCLIPDISTANCE))
+    {
+      // On certain GPUs we have to consume the clip distance from the vertex shader
+      // or else the other vertex shader outputs will get corrupted.
+      out.Write("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n");
+      out.Write("\tf.clipDist1 = gl_in[i].gl_ClipDistance[1];\n");
+    }
  }
  else
  {
@ -312,6 +322,11 @@ static void EmitVertex(ShaderCode& out, const geometry_shader_uid_data* uid_data
  if (ApiType == APIType::OpenGL)
  {
    out.Write("\tgl_Position = %s.pos;\n", vertex);
+    if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
+    {
+      out.Write("\tgl_ClipDistance[0] = %s.clipDist0;\n", vertex);
+      out.Write("\tgl_ClipDistance[1] = %s.clipDist1;\n", vertex);
+    }
    AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, uid_data->pixel_lighting);
  }
  else
--- a/Source/Core/VideoCommon/RenderBase.cpp
+++ b/Source/Core/VideoCommon/RenderBase.cpp
@ -86,6 +86,12 @@ unsigned int Renderer::efb_scale_numeratorY = 1;
 unsigned int Renderer::efb_scale_denominatorX = 1;
 unsigned int Renderer::efb_scale_denominatorY = 1;

+// The maximum depth that is written to the depth buffer should never exceed this value.
+// This is necessary because we use a 2^24 divisor for all our depth values to prevent
+// floating-point round-trip errors. However the console GPU doesn't ever write a value
+// to the depth buffer that exceeds 2^24 - 1.
+const float Renderer::GX_MAX_DEPTH = 16777215.0f / 16777216.0f;
+
 static float AspectToWidescreen(float aspect)
 {
  return aspect * ((16.0f / 9.0f) / (4.0f / 3.0f));
--- a/Source/Core/VideoCommon/RenderBase.h
+++ b/Source/Core/VideoCommon/RenderBase.h
@ -176,6 +176,8 @@ protected:

  static std::unique_ptr<PostProcessingShaderImplementation> m_post_processor;

+  static const float GX_MAX_DEPTH;
+
 private:
  static PEControl::PixelFormat prev_efb_format;
  static unsigned int efb_scale_numeratorX;
--- a/Source/Core/VideoCommon/ShaderGenCommon.h
+++ b/Source/Core/VideoCommon/ShaderGenCommon.h
@ -196,6 +196,9 @@ inline void GenerateVSOutputMembers(T& object, APIType api_type, u32 texgens,
    DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, "TEXCOORD",
                       texgens + 2);
  }
+
+  DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, "SV_ClipDistance", 0);
+  DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, "SV_ClipDistance", 1);
 }

 template <class T>
@ -216,6 +219,9 @@ inline void AssignVSOutputMembers(T& object, const char* a, const char* b, u32 t
    object.Write("\t%s.Normal = %s.Normal;\n", a, b);
    object.Write("\t%s.WorldPos = %s.WorldPos;\n", a, b);
  }
+
+  object.Write("\t%s.clipDist0 = %s.clipDist0;\n", a, b);
+  object.Write("\t%s.clipDist1 = %s.clipDist1;\n", a, b);
 }

 // We use the flag "centroid" to fix some MSAA rendering bugs. With MSAA, the
--- a/Source/Core/VideoCommon/VertexShaderGen.cpp
+++ b/Source/Core/VideoCommon/VertexShaderGen.cpp
@ -399,28 +399,43 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da
      out.Write("o.colors_1 = color1;\n");
  }

-  // write the true depth value, if the game uses depth textures pixel shaders will override with
-  // the correct values
-  // if not early z culling will improve speed
-  if (g_ActiveConfig.backend_info.bSupportsClipControl)
+  // Write the true depth value. If the game uses depth textures, then the pixel shader will
+  // override it with the correct values if not then early z culling will improve speed.
+  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
  {
+    // If we can disable the incorrect depth clipping planes using depth clamping, then we can do
+    // our own depth clipping and calculate the depth range before the perspective divide.
+
+    // Since we're adjusting z for the depth range before the perspective divide, we have to do our
+    // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
+    out.Write("o.clipDist0 = o.pos.z + o.pos.w;\n");  // Near: z < -w
+    out.Write("o.clipDist1 = -o.pos.z;\n");           // Far: z > 0
+
+    // Adjust z for the depth range. We're using an equation which incorperates a depth inversion,
+    // so we can map the console -1..0 range to the 0..1 range used in the depth buffer.
+    // We have to handle the depth range in the vertex shader instead of after the perspective
+    // divide, because some games will use a depth range larger than what is allowed by the
+    // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
+    // games effectively add a depth bias to the values written to the depth buffer.
+    out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
+              "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
+  }
+  else
+  {
+    // If we can't disable the incorrect depth clipping planes, then we need to rely on the
+    // graphics API to handle the depth range after the perspective divide. This can result in
+    // inaccurate depth values due to the missing depth bias, but that can be least corrected by
+    // overriding depth values in the pixel shader. We still need to take care of the reversed depth
+    // though, so we do that here.
    out.Write("o.pos.z = -o.pos.z;\n");
  }
-  else  // OGL
+
+  if (!g_ActiveConfig.backend_info.bSupportsClipControl)
  {
-    // this results in a scale from -1..0 to -1..1 after perspective
-    // divide
-    out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n");
-
-    // the next steps of the OGL pipeline are:
-    // (x_c,y_c,z_c,w_c) = o.pos  //switch to OGL spec terminology
-    // clipping to -w_c <= (x_c,y_c,z_c) <= w_c
-    // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide
-    // z_w = (f-n)/2*z_d + (n+f)/2
-    // z_w now contains the value to go to the 0..1 depth buffer
-
-    // trying to get the correct semantic while not using glDepthRange
-    // seems to get rather complicated
+    // If the graphics API doesn't support a depth range of 0..1, then we need to map z to
+    // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
+    // operation that can introduce a round-trip error.
+    out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
  }

  // The console GPU places the pixel center at 7/12 in screen space unless
@ -453,6 +468,11 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da
      out.Write("colors_1 = o.colors_1;\n");
    }

+    if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
+    {
+      out.Write("gl_ClipDistance[0] = o.clipDist0;\n");
+      out.Write("gl_ClipDistance[1] = o.clipDist1;\n");
+    }
    out.Write("gl_Position = o.pos;\n");
  }
  else  // D3D
--- a/Source/Core/VideoCommon/VertexShaderManager.cpp
+++ b/Source/Core/VideoCommon/VertexShaderManager.cpp
@ -386,6 +386,15 @@ void VertexShaderManager::SetConstants()
    const float pixel_size_y = 2.f / Renderer::EFBToScaledXf(2.f * xfmem.viewport.ht);
    constants.pixelcentercorrection[0] = pixel_center_correction * pixel_size_x;
    constants.pixelcentercorrection[1] = pixel_center_correction * pixel_size_y;
+
+    // The depth range is handled in the vertex shader. We need to reverse
+    // the far value to get a reversed depth range mapping. This is necessary
+    // because the standard depth range equation pushes all depth values towards
+    // the back of the depth buffer where conventionally depth buffers have the
+    // least precision.
+    constants.pixelcentercorrection[2] = xfmem.viewport.zRange / 16777215.0f;
+    constants.pixelcentercorrection[3] = 1.0f - xfmem.viewport.farZ / 16777215.0f;
+
    dirty = true;
    // This is so implementation-dependent that we can't have it here.
    g_renderer->SetViewport();
--- a/Source/Core/VideoCommon/VideoConfig.h
+++ b/Source/Core/VideoCommon/VideoConfig.h
@ -171,6 +171,7 @@ struct VideoConfig final
    bool bSupportsPaletteConversion;
    bool bSupportsClipControl;  // Needed by VertexShaderGen, so must stay in VideoCommon
    bool bSupportsSSAA;
+    bool bSupportsDepthClamp;  // Needed by VertexShaderGen, so must stay in VideoCommon
  } backend_info;

  // Utility