Merge pull request #4085 from Armada651/vertex-depth

VideoCommon: Implement depth range equation in vertex shader.
2016-08-19 02:11:37 +02:00 · 2016-08-19 02:11:37 +02:00 · 40f4308dc2
parent 0015d2e86b e86d7cbc99
commit 40f4308dc2
17 changed files with 135 additions and 40 deletions
--- a/Source/Core/VideoBackends/D3D/D3DState.cpp
+++ b/Source/Core/VideoBackends/D3D/D3DState.cpp
@ -440,7 +440,7 @@ ID3D11RasterizerState* StateCache::Get(RasterizerState state)
    return it->second;
  D3D11_RASTERIZER_DESC rastdc = CD3D11_RASTERIZER_DESC(D3D11_FILL_SOLID, state.cull_mode, false, 0,
-                                                        0.f, 0, true, true, false, false);
+                                                        0.f, 0, false, true, false, false);
  ID3D11RasterizerState* res = nullptr;
--- a/Source/Core/VideoBackends/D3D/Render.cpp
+++ b/Source/Core/VideoBackends/D3D/Render.cpp
@ -578,14 +578,10 @@ void Renderer::SetViewport()
  Wd = (X + Wd <= GetTargetWidth()) ? Wd : (GetTargetWidth() - X);
  Ht = (Y + Ht <= GetTargetHeight()) ? Ht : (GetTargetHeight() - Y);
-  D3D11_VIEWPORT vp = CD3D11_VIEWPORT(
+  // We do depth clipping and depth range in the vertex shader instead of relying
-      X, Y, Wd, Ht,
+  // on the graphics API. However we still need to ensure depth values don't exceed
-      1.0f - MathUtil::Clamp<float>(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f,
+  // the maximum value supported by the console GPU.
-      1.0f -
+  D3D11_VIEWPORT vp = CD3D11_VIEWPORT(X, Y, Wd, Ht, D3D11_MIN_DEPTH, GX_MAX_DEPTH);
          MathUtil::Clamp<float>(xfmem.viewport.farZ - MathUtil::Clamp<float>(xfmem.viewport.zRange,
                                                                              0.0f, 16777216.0f),
                                 0.0f, 16777215.0f) /
              16777216.0f);
  D3D::context->RSSetViewports(1, &vp);
 }
--- a/Source/Core/VideoBackends/D3D/main.cpp
+++ b/Source/Core/VideoBackends/D3D/main.cpp
@ -71,6 +71,7 @@ void VideoBackend::InitBackendInfo()
  g_Config.backend_info.bSupportsPostProcessing = false;
  g_Config.backend_info.bSupportsPaletteConversion = true;
  g_Config.backend_info.bSupportsClipControl = true;
  g_Config.backend_info.bSupportsDepthClamp = true;
  IDXGIFactory* factory;
  IDXGIAdapter* ad;
--- a/Source/Core/VideoBackends/D3D12/D3DState.cpp
+++ b/Source/Core/VideoBackends/D3D12/D3DState.cpp
@ -329,7 +329,7 @@ D3D12_RASTERIZER_DESC StateCache::GetDesc12(RasterizerState state)
          0,
          0.f,
          0,
-          true,
+          false,
          true,
          false,
          0,
--- a/Source/Core/VideoBackends/D3D12/Render.cpp
+++ b/Source/Core/VideoBackends/D3D12/Render.cpp
@ -482,15 +482,10 @@ void Renderer::SetViewport()
  width = (x + width <= GetTargetWidth()) ? width : (GetTargetWidth() - x);
  height = (y + height <= GetTargetHeight()) ? height : (GetTargetHeight() - y);
-  D3D12_VIEWPORT vp = {
+  // We do depth clipping and depth range in the vertex shader instead of relying
-      x, y, width, height,
+  // on the graphics API. However we still need to ensure depth values don't exceed
-      1.0f - MathUtil::Clamp<float>(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f,
+  // the maximum value supported by the console GPU.
-      1.0f -
+  D3D12_VIEWPORT vp = {x, y, width, height, D3D12_MIN_DEPTH, GX_MAX_DEPTH};
          MathUtil::Clamp<float>(xfmem.viewport.farZ - MathUtil::Clamp<float>(xfmem.viewport.zRange,
                                                                              0.0f, 16777216.0f),
                                 0.0f, 16777215.0f) /
              16777216.0f};
  D3D::current_command_list->RSSetViewports(1, &vp);
 }
--- a/Source/Core/VideoBackends/D3D12/main.cpp
+++ b/Source/Core/VideoBackends/D3D12/main.cpp
@ -74,6 +74,7 @@ void VideoBackend::InitBackendInfo()
  g_Config.backend_info.bSupportsPostProcessing = false;
  g_Config.backend_info.bSupportsPaletteConversion = true;
  g_Config.backend_info.bSupportsClipControl = true;
  g_Config.backend_info.bSupportsDepthClamp = true;
  IDXGIFactory* factory;
  IDXGIAdapter* ad;
--- a/Source/Core/VideoBackends/OGL/Render.cpp
+++ b/Source/Core/VideoBackends/OGL/Render.cpp
@ -481,6 +481,9 @@ Renderer::Renderer()
  g_Config.backend_info.bSupportsBindingLayout =
      GLExtensions::Supports("GL_ARB_shading_language_420pack");
  // Clip distance support is useless without a method to clamp the depth range
  g_Config.backend_info.bSupportsDepthClamp = GLExtensions::Supports("GL_ARB_depth_clamp");
  g_ogl_config.bSupportsGLSLCache = GLExtensions::Supports("GL_ARB_get_program_binary");
  g_ogl_config.bSupportsGLPinnedMemory = GLExtensions::Supports("GL_AMD_pinned_memory");
  g_ogl_config.bSupportsGLSync = GLExtensions::Supports("GL_ARB_sync");
@ -520,6 +523,10 @@ Renderer::Renderer()
    g_ogl_config.bSupportsGLSLCache = true;
    g_ogl_config.bSupportsGLSync = true;
    // TODO: Implement support for GL_EXT_clip_cull_distance when there is an extension for
    // depth clamping.
    g_Config.backend_info.bSupportsDepthClamp = false;
    if (strstr(g_ogl_config.glsl_version, "3.0"))
    {
      g_ogl_config.eSupportedGLSLVersion = GLSLES_300;
@ -669,7 +676,7 @@ Renderer::Renderer()
                                   g_ogl_config.gl_renderer, g_ogl_config.gl_version),
                  5000);
-  WARN_LOG(VIDEO, "Missing OGL Extensions: %s%s%s%s%s%s%s%s%s%s%s%s%s",
+  WARN_LOG(VIDEO, "Missing OGL Extensions: %s%s%s%s%s%s%s%s%s%s%s%s%s%s",
           g_ActiveConfig.backend_info.bSupportsDualSourceBlend ? "" : "DualSourceBlend ",
           g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? "" : "PrimitiveRestart ",
           g_ActiveConfig.backend_info.bSupportsEarlyZ ? "" : "EarlyZ ",
@ -681,7 +688,8 @@ Renderer::Renderer()
           g_ActiveConfig.backend_info.bSupportsSSAA ? "" : "SSAA ",
           g_ActiveConfig.backend_info.bSupportsGSInstancing ? "" : "GSInstancing ",
           g_ActiveConfig.backend_info.bSupportsClipControl ? "" : "ClipControl ",
-           g_ogl_config.bSupportsCopySubImage ? "" : "CopyImageSubData ");
+           g_ogl_config.bSupportsCopySubImage ? "" : "CopyImageSubData ",
           g_ActiveConfig.backend_info.bSupportsDepthClamp ? "" : "DepthClamp ");
  s_last_multisamples = g_ActiveConfig.iMultisamples;
  s_MSAASamples = s_last_multisamples;
@ -724,6 +732,12 @@ Renderer::Renderer()
  glClearDepthf(1.0f);
  glEnable(GL_DEPTH_TEST);
  glDepthFunc(GL_LEQUAL);
  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
  {
    glEnable(GL_CLIP_DISTANCE0);
    glEnable(GL_CLIP_DISTANCE1);
    glEnable(GL_DEPTH_CLAMP);
  }
  glPixelStorei(GL_UNPACK_ALIGNMENT, 4);  // 4-byte pixel alignment
@ -1142,6 +1156,14 @@ void Renderer::SetViewport()
    auto iceilf = [](float f) { return static_cast<GLint>(ceilf(f)); };
    glViewport(iceilf(X), iceilf(Y), iceilf(Width), iceilf(Height));
  }
  // Set the reversed depth range. If we do depth clipping and depth range in the
  // vertex shader we only need to ensure depth values don't exceed the maximum
  // value supported by the console GPU. If not, we simply clamp the near/far values
  // themselves to the maximum value as done above.
  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
    glDepthRangef(GX_MAX_DEPTH, 0.0f);
  else
    glDepthRangef(GLFar, GLNear);
 }
@ -1647,6 +1669,11 @@ void Renderer::ResetAPIState()
  glDisable(GL_BLEND);
  if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL)
    glDisable(GL_COLOR_LOGIC_OP);
  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
  {
    glDisable(GL_CLIP_DISTANCE0);
    glDisable(GL_CLIP_DISTANCE1);
  }
  glDepthMask(GL_FALSE);
  glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
 }
@ -1655,6 +1682,11 @@ void Renderer::RestoreAPIState()
 {
  // Gets us back into a more game-like state.
  glEnable(GL_SCISSOR_TEST);
  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
  {
    glEnable(GL_CLIP_DISTANCE0);
    glEnable(GL_CLIP_DISTANCE1);
  }
  SetGenerationMode();
  BPFunctions::SetScissor();
  SetColorMask();
--- a/Source/Core/VideoBackends/OGL/main.cpp
+++ b/Source/Core/VideoBackends/OGL/main.cpp
@ -108,6 +108,7 @@ void VideoBackend::InitBackendInfo()
  g_Config.backend_info.bSupportsPrimitiveRestart = true;
  g_Config.backend_info.bSupportsPaletteConversion = true;
  g_Config.backend_info.bSupportsClipControl = true;
  g_Config.backend_info.bSupportsDepthClamp = true;
  g_Config.backend_info.Adapters.clear();
--- a/Source/Core/VideoCommon/DriverDetails.cpp
+++ b/Source/Core/VideoCommon/DriverDetails.cpp
@ -74,6 +74,7 @@ static BugInfo m_known_bugs[] = {
    {OS_WINDOWS, VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE,
     101810.3907, 101810.3960, true},
    {OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true},
    {OS_ALL, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, BUG_BROKENCLIPDISTANCE, -1.0, -1.0, true},
 };
 static std::map<Bug, BugInfo> m_bugs;
--- a/Source/Core/VideoCommon/DriverDetails.h
+++ b/Source/Core/VideoCommon/DriverDetails.h
@ -204,6 +204,15 @@ enum Bug
  // GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData
  // everywhere else.
  BUG_SLOWGETBUFFERSUBDATA,
  // Bug: Broken lines in geometry shaders when writing to gl_ClipDistance in the vertex shader
  // Affected Devices: Mesa i965
  // Started Version: -1
  // Ended Version: -1
  // Writing to gl_ClipDistance in both the vertex shader and the geometry shader will break
  // the geometry shader. Current workaround is to make sure the geometry shader always consumes
  // the gl_ClipDistance inputs from the vertex shader.
  BUG_BROKENCLIPDISTANCE,
 };
 // Initializes our internal vendor, device family, and driver version
--- a/Source/Core/VideoCommon/GeometryShaderGen.cpp
+++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp
@ -7,6 +7,7 @@
 #include "Common/CommonTypes.h"
 #include "VideoCommon/BPMemory.h"
 #include "VideoCommon/DriverDetails.h"
 #include "VideoCommon/GeometryShaderGen.h"
 #include "VideoCommon/LightingShaderGen.h"
 #include "VideoCommon/VideoCommon.h"
@ -211,6 +212,15 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid
  {
    out.Write("\tVS_OUTPUT f;\n");
    AssignVSOutputMembers(out, "f", "vs[i]", uid_data->numTexGens, uid_data->pixel_lighting);
    if (g_ActiveConfig.backend_info.bSupportsDepthClamp &&
        DriverDetails::HasBug(DriverDetails::BUG_BROKENCLIPDISTANCE))
    {
      // On certain GPUs we have to consume the clip distance from the vertex shader
      // or else the other vertex shader outputs will get corrupted.
      out.Write("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n");
      out.Write("\tf.clipDist1 = gl_in[i].gl_ClipDistance[1];\n");
    }
  }
  else
  {
@ -312,6 +322,11 @@ static void EmitVertex(ShaderCode& out, const geometry_shader_uid_data* uid_data
  if (ApiType == APIType::OpenGL)
  {
    out.Write("\tgl_Position = %s.pos;\n", vertex);
    if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
    {
      out.Write("\tgl_ClipDistance[0] = %s.clipDist0;\n", vertex);
      out.Write("\tgl_ClipDistance[1] = %s.clipDist1;\n", vertex);
    }
    AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, uid_data->pixel_lighting);
  }
  else
--- a/Source/Core/VideoCommon/RenderBase.cpp
+++ b/Source/Core/VideoCommon/RenderBase.cpp
@ -86,6 +86,12 @@ unsigned int Renderer::efb_scale_numeratorY = 1;
 unsigned int Renderer::efb_scale_denominatorX = 1;
 unsigned int Renderer::efb_scale_denominatorY = 1;
 // The maximum depth that is written to the depth buffer should never exceed this value.
 // This is necessary because we use a 2^24 divisor for all our depth values to prevent
 // floating-point round-trip errors. However the console GPU doesn't ever write a value
 // to the depth buffer that exceeds 2^24 - 1.
 const float Renderer::GX_MAX_DEPTH = 16777215.0f / 16777216.0f;
 static float AspectToWidescreen(float aspect)
 {
  return aspect * ((16.0f / 9.0f) / (4.0f / 3.0f));
--- a/Source/Core/VideoCommon/RenderBase.h
+++ b/Source/Core/VideoCommon/RenderBase.h
@ -176,6 +176,8 @@ protected:
  static std::unique_ptr<PostProcessingShaderImplementation> m_post_processor;
  static const float GX_MAX_DEPTH;
 private:
  static PEControl::PixelFormat prev_efb_format;
  static unsigned int efb_scale_numeratorX;
--- a/Source/Core/VideoCommon/ShaderGenCommon.h
+++ b/Source/Core/VideoCommon/ShaderGenCommon.h
@ -196,6 +196,9 @@ inline void GenerateVSOutputMembers(T& object, APIType api_type, u32 texgens,
    DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, "TEXCOORD",
                       texgens + 2);
  }
  DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, "SV_ClipDistance", 0);
  DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, "SV_ClipDistance", 1);
 }
 template <class T>
@ -216,6 +219,9 @@ inline void AssignVSOutputMembers(T& object, const char* a, const char* b, u32 t
    object.Write("\t%s.Normal = %s.Normal;\n", a, b);
    object.Write("\t%s.WorldPos = %s.WorldPos;\n", a, b);
  }
  object.Write("\t%s.clipDist0 = %s.clipDist0;\n", a, b);
  object.Write("\t%s.clipDist1 = %s.clipDist1;\n", a, b);
 }
 // We use the flag "centroid" to fix some MSAA rendering bugs. With MSAA, the
--- a/Source/Core/VideoCommon/VertexShaderGen.cpp
+++ b/Source/Core/VideoCommon/VertexShaderGen.cpp
@ -399,28 +399,43 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da
      out.Write("o.colors_1 = color1;\n");
  }
-  // write the true depth value, if the game uses depth textures pixel shaders will override with
+  // Write the true depth value. If the game uses depth textures, then the pixel shader will
-  // the correct values
+  // override it with the correct values if not then early z culling will improve speed.
-  // if not early z culling will improve speed
+  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
  if (g_ActiveConfig.backend_info.bSupportsClipControl)
  {
    // If we can disable the incorrect depth clipping planes using depth clamping, then we can do
    // our own depth clipping and calculate the depth range before the perspective divide.
    // Since we're adjusting z for the depth range before the perspective divide, we have to do our
    // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
    out.Write("o.clipDist0 = o.pos.z + o.pos.w;\n");  // Near: z < -w
    out.Write("o.clipDist1 = -o.pos.z;\n");           // Far: z > 0
    // Adjust z for the depth range. We're using an equation which incorperates a depth inversion,
    // so we can map the console -1..0 range to the 0..1 range used in the depth buffer.
    // We have to handle the depth range in the vertex shader instead of after the perspective
    // divide, because some games will use a depth range larger than what is allowed by the
    // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
    // games effectively add a depth bias to the values written to the depth buffer.
    out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
              "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
  }
  else
  {
    // If we can't disable the incorrect depth clipping planes, then we need to rely on the
    // graphics API to handle the depth range after the perspective divide. This can result in
    // inaccurate depth values due to the missing depth bias, but that can be least corrected by
    // overriding depth values in the pixel shader. We still need to take care of the reversed depth
    // though, so we do that here.
    out.Write("o.pos.z = -o.pos.z;\n");
  }
-  else  // OGL
+
  if (!g_ActiveConfig.backend_info.bSupportsClipControl)
  {
-    // this results in a scale from -1..0 to -1..1 after perspective
+    // If the graphics API doesn't support a depth range of 0..1, then we need to map z to
-    // divide
+    // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
-    out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n");
+    // operation that can introduce a round-trip error.
-
+    out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
    // the next steps of the OGL pipeline are:
    // (x_c,y_c,z_c,w_c) = o.pos  //switch to OGL spec terminology
    // clipping to -w_c <= (x_c,y_c,z_c) <= w_c
    // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide
    // z_w = (f-n)/2*z_d + (n+f)/2
    // z_w now contains the value to go to the 0..1 depth buffer
    // trying to get the correct semantic while not using glDepthRange
    // seems to get rather complicated
  }
  // The console GPU places the pixel center at 7/12 in screen space unless
@ -453,6 +468,11 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da
      out.Write("colors_1 = o.colors_1;\n");
    }
    if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
    {
      out.Write("gl_ClipDistance[0] = o.clipDist0;\n");
      out.Write("gl_ClipDistance[1] = o.clipDist1;\n");
    }
    out.Write("gl_Position = o.pos;\n");
  }
  else  // D3D
--- a/Source/Core/VideoCommon/VertexShaderManager.cpp
+++ b/Source/Core/VideoCommon/VertexShaderManager.cpp
@ -386,6 +386,15 @@ void VertexShaderManager::SetConstants()
    const float pixel_size_y = 2.f / Renderer::EFBToScaledXf(2.f * xfmem.viewport.ht);
    constants.pixelcentercorrection[0] = pixel_center_correction * pixel_size_x;
    constants.pixelcentercorrection[1] = pixel_center_correction * pixel_size_y;
    // The depth range is handled in the vertex shader. We need to reverse
    // the far value to get a reversed depth range mapping. This is necessary
    // because the standard depth range equation pushes all depth values towards
    // the back of the depth buffer where conventionally depth buffers have the
    // least precision.
    constants.pixelcentercorrection[2] = xfmem.viewport.zRange / 16777215.0f;
    constants.pixelcentercorrection[3] = 1.0f - xfmem.viewport.farZ / 16777215.0f;
    dirty = true;
    // This is so implementation-dependent that we can't have it here.
    g_renderer->SetViewport();
--- a/Source/Core/VideoCommon/VideoConfig.h
+++ b/Source/Core/VideoCommon/VideoConfig.h
@ -171,6 +171,7 @@ struct VideoConfig final
    bool bSupportsPaletteConversion;
    bool bSupportsClipControl;  // Needed by VertexShaderGen, so must stay in VideoCommon
    bool bSupportsSSAA;
    bool bSupportsDepthClamp;  // Needed by VertexShaderGen, so must stay in VideoCommon
  } backend_info;
  // Utility
 ,
 .f,
 ,
-          true,
+          false,
           true,
           false,
 ,