diff --git a/Source/Core/VideoBackends/D3D/D3DState.cpp b/Source/Core/VideoBackends/D3D/D3DState.cpp index e277c74805..e82f103586 100644 --- a/Source/Core/VideoBackends/D3D/D3DState.cpp +++ b/Source/Core/VideoBackends/D3D/D3DState.cpp @@ -440,7 +440,7 @@ ID3D11RasterizerState* StateCache::Get(RasterizerState state) return it->second; D3D11_RASTERIZER_DESC rastdc = CD3D11_RASTERIZER_DESC(D3D11_FILL_SOLID, state.cull_mode, false, 0, - 0.f, 0, true, true, false, false); + 0.f, 0, false, true, false, false); ID3D11RasterizerState* res = nullptr; diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index d7babc9ff7..a6301e53a2 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -578,14 +578,10 @@ void Renderer::SetViewport() Wd = (X + Wd <= GetTargetWidth()) ? Wd : (GetTargetWidth() - X); Ht = (Y + Ht <= GetTargetHeight()) ? Ht : (GetTargetHeight() - Y); - D3D11_VIEWPORT vp = CD3D11_VIEWPORT( - X, Y, Wd, Ht, - 1.0f - MathUtil::Clamp(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f, - 1.0f - - MathUtil::Clamp(xfmem.viewport.farZ - MathUtil::Clamp(xfmem.viewport.zRange, - 0.0f, 16777216.0f), - 0.0f, 16777215.0f) / - 16777216.0f); + // We do depth clipping and depth range in the vertex shader instead of relying + // on the graphics API. However we still need to ensure depth values don't exceed + // the maximum value supported by the console GPU. + D3D11_VIEWPORT vp = CD3D11_VIEWPORT(X, Y, Wd, Ht, D3D11_MIN_DEPTH, GX_MAX_DEPTH); D3D::context->RSSetViewports(1, &vp); } diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index e471e3715a..2471135afd 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -71,6 +71,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsPostProcessing = false; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsClipControl = true; + g_Config.backend_info.bSupportsDepthClamp = true; IDXGIFactory* factory; IDXGIAdapter* ad; diff --git a/Source/Core/VideoBackends/D3D12/D3DState.cpp b/Source/Core/VideoBackends/D3D12/D3DState.cpp index 2902f3272f..36e997d895 100644 --- a/Source/Core/VideoBackends/D3D12/D3DState.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DState.cpp @@ -329,7 +329,7 @@ D3D12_RASTERIZER_DESC StateCache::GetDesc12(RasterizerState state) 0, 0.f, 0, - true, + false, true, false, 0, diff --git a/Source/Core/VideoBackends/D3D12/Render.cpp b/Source/Core/VideoBackends/D3D12/Render.cpp index 6124534204..912cd4908d 100644 --- a/Source/Core/VideoBackends/D3D12/Render.cpp +++ b/Source/Core/VideoBackends/D3D12/Render.cpp @@ -482,15 +482,10 @@ void Renderer::SetViewport() width = (x + width <= GetTargetWidth()) ? width : (GetTargetWidth() - x); height = (y + height <= GetTargetHeight()) ? height : (GetTargetHeight() - y); - D3D12_VIEWPORT vp = { - x, y, width, height, - 1.0f - MathUtil::Clamp(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f, - 1.0f - - MathUtil::Clamp(xfmem.viewport.farZ - MathUtil::Clamp(xfmem.viewport.zRange, - 0.0f, 16777216.0f), - 0.0f, 16777215.0f) / - 16777216.0f}; - + // We do depth clipping and depth range in the vertex shader instead of relying + // on the graphics API. However we still need to ensure depth values don't exceed + // the maximum value supported by the console GPU. + D3D12_VIEWPORT vp = {x, y, width, height, D3D12_MIN_DEPTH, GX_MAX_DEPTH}; D3D::current_command_list->RSSetViewports(1, &vp); } diff --git a/Source/Core/VideoBackends/D3D12/main.cpp b/Source/Core/VideoBackends/D3D12/main.cpp index 69a53603f1..f6e5512da1 100644 --- a/Source/Core/VideoBackends/D3D12/main.cpp +++ b/Source/Core/VideoBackends/D3D12/main.cpp @@ -74,6 +74,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsPostProcessing = false; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsClipControl = true; + g_Config.backend_info.bSupportsDepthClamp = true; IDXGIFactory* factory; IDXGIAdapter* ad; diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index bde616deac..d637c308ca 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -481,6 +481,9 @@ Renderer::Renderer() g_Config.backend_info.bSupportsBindingLayout = GLExtensions::Supports("GL_ARB_shading_language_420pack"); + // Clip distance support is useless without a method to clamp the depth range + g_Config.backend_info.bSupportsDepthClamp = GLExtensions::Supports("GL_ARB_depth_clamp"); + g_ogl_config.bSupportsGLSLCache = GLExtensions::Supports("GL_ARB_get_program_binary"); g_ogl_config.bSupportsGLPinnedMemory = GLExtensions::Supports("GL_AMD_pinned_memory"); g_ogl_config.bSupportsGLSync = GLExtensions::Supports("GL_ARB_sync"); @@ -520,6 +523,10 @@ Renderer::Renderer() g_ogl_config.bSupportsGLSLCache = true; g_ogl_config.bSupportsGLSync = true; + // TODO: Implement support for GL_EXT_clip_cull_distance when there is an extension for + // depth clamping. + g_Config.backend_info.bSupportsDepthClamp = false; + if (strstr(g_ogl_config.glsl_version, "3.0")) { g_ogl_config.eSupportedGLSLVersion = GLSLES_300; @@ -669,7 +676,7 @@ Renderer::Renderer() g_ogl_config.gl_renderer, g_ogl_config.gl_version), 5000); - WARN_LOG(VIDEO, "Missing OGL Extensions: %s%s%s%s%s%s%s%s%s%s%s%s%s", + WARN_LOG(VIDEO, "Missing OGL Extensions: %s%s%s%s%s%s%s%s%s%s%s%s%s%s", g_ActiveConfig.backend_info.bSupportsDualSourceBlend ? "" : "DualSourceBlend ", g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? "" : "PrimitiveRestart ", g_ActiveConfig.backend_info.bSupportsEarlyZ ? "" : "EarlyZ ", @@ -681,7 +688,8 @@ Renderer::Renderer() g_ActiveConfig.backend_info.bSupportsSSAA ? "" : "SSAA ", g_ActiveConfig.backend_info.bSupportsGSInstancing ? "" : "GSInstancing ", g_ActiveConfig.backend_info.bSupportsClipControl ? "" : "ClipControl ", - g_ogl_config.bSupportsCopySubImage ? "" : "CopyImageSubData "); + g_ogl_config.bSupportsCopySubImage ? "" : "CopyImageSubData ", + g_ActiveConfig.backend_info.bSupportsDepthClamp ? "" : "DepthClamp "); s_last_multisamples = g_ActiveConfig.iMultisamples; s_MSAASamples = s_last_multisamples; @@ -724,6 +732,12 @@ Renderer::Renderer() glClearDepthf(1.0f); glEnable(GL_DEPTH_TEST); glDepthFunc(GL_LEQUAL); + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + { + glEnable(GL_CLIP_DISTANCE0); + glEnable(GL_CLIP_DISTANCE1); + glEnable(GL_DEPTH_CLAMP); + } glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment @@ -1142,7 +1156,15 @@ void Renderer::SetViewport() auto iceilf = [](float f) { return static_cast(ceilf(f)); }; glViewport(iceilf(X), iceilf(Y), iceilf(Width), iceilf(Height)); } - glDepthRangef(GLFar, GLNear); + + // Set the reversed depth range. If we do depth clipping and depth range in the + // vertex shader we only need to ensure depth values don't exceed the maximum + // value supported by the console GPU. If not, we simply clamp the near/far values + // themselves to the maximum value as done above. + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + glDepthRangef(GX_MAX_DEPTH, 0.0f); + else + glDepthRangef(GLFar, GLNear); } void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, @@ -1647,6 +1669,11 @@ void Renderer::ResetAPIState() glDisable(GL_BLEND); if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL) glDisable(GL_COLOR_LOGIC_OP); + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + { + glDisable(GL_CLIP_DISTANCE0); + glDisable(GL_CLIP_DISTANCE1); + } glDepthMask(GL_FALSE); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); } @@ -1655,6 +1682,11 @@ void Renderer::RestoreAPIState() { // Gets us back into a more game-like state. glEnable(GL_SCISSOR_TEST); + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + { + glEnable(GL_CLIP_DISTANCE0); + glEnable(GL_CLIP_DISTANCE1); + } SetGenerationMode(); BPFunctions::SetScissor(); SetColorMask(); diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 5f667efb1d..d27b763a88 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -108,6 +108,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsClipControl = true; + g_Config.backend_info.bSupportsDepthClamp = true; g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index f65f0afa3e..6da311c4e2 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -74,6 +74,7 @@ static BugInfo m_known_bugs[] = { {OS_WINDOWS, VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE, 101810.3907, 101810.3960, true}, {OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true}, + {OS_ALL, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, BUG_BROKENCLIPDISTANCE, -1.0, -1.0, true}, }; static std::map m_bugs; diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index 9c705694c6..b6dcf87963 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -204,6 +204,15 @@ enum Bug // GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData // everywhere else. BUG_SLOWGETBUFFERSUBDATA, + + // Bug: Broken lines in geometry shaders when writing to gl_ClipDistance in the vertex shader + // Affected Devices: Mesa i965 + // Started Version: -1 + // Ended Version: -1 + // Writing to gl_ClipDistance in both the vertex shader and the geometry shader will break + // the geometry shader. Current workaround is to make sure the geometry shader always consumes + // the gl_ClipDistance inputs from the vertex shader. + BUG_BROKENCLIPDISTANCE, }; // Initializes our internal vendor, device family, and driver version diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index 4d90e6f10d..83b30943bc 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -7,6 +7,7 @@ #include "Common/CommonTypes.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/DriverDetails.h" #include "VideoCommon/GeometryShaderGen.h" #include "VideoCommon/LightingShaderGen.h" #include "VideoCommon/VideoCommon.h" @@ -211,6 +212,15 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid { out.Write("\tVS_OUTPUT f;\n"); AssignVSOutputMembers(out, "f", "vs[i]", uid_data->numTexGens, uid_data->pixel_lighting); + + if (g_ActiveConfig.backend_info.bSupportsDepthClamp && + DriverDetails::HasBug(DriverDetails::BUG_BROKENCLIPDISTANCE)) + { + // On certain GPUs we have to consume the clip distance from the vertex shader + // or else the other vertex shader outputs will get corrupted. + out.Write("\tf.clipDist0 = gl_in[i].gl_ClipDistance[0];\n"); + out.Write("\tf.clipDist1 = gl_in[i].gl_ClipDistance[1];\n"); + } } else { @@ -312,6 +322,11 @@ static void EmitVertex(ShaderCode& out, const geometry_shader_uid_data* uid_data if (ApiType == APIType::OpenGL) { out.Write("\tgl_Position = %s.pos;\n", vertex); + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + { + out.Write("\tgl_ClipDistance[0] = %s.clipDist0;\n", vertex); + out.Write("\tgl_ClipDistance[1] = %s.clipDist1;\n", vertex); + } AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, uid_data->pixel_lighting); } else diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index fade0d00b9..3b50cd2313 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -86,6 +86,12 @@ unsigned int Renderer::efb_scale_numeratorY = 1; unsigned int Renderer::efb_scale_denominatorX = 1; unsigned int Renderer::efb_scale_denominatorY = 1; +// The maximum depth that is written to the depth buffer should never exceed this value. +// This is necessary because we use a 2^24 divisor for all our depth values to prevent +// floating-point round-trip errors. However the console GPU doesn't ever write a value +// to the depth buffer that exceeds 2^24 - 1. +const float Renderer::GX_MAX_DEPTH = 16777215.0f / 16777216.0f; + static float AspectToWidescreen(float aspect) { return aspect * ((16.0f / 9.0f) / (4.0f / 3.0f)); diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 81554f5e78..1ae3121637 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -176,6 +176,8 @@ protected: static std::unique_ptr m_post_processor; + static const float GX_MAX_DEPTH; + private: static PEControl::PixelFormat prev_efb_format; static unsigned int efb_scale_numeratorX; diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index e7d0667861..97c63ae2ea 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -196,6 +196,9 @@ inline void GenerateVSOutputMembers(T& object, APIType api_type, u32 texgens, DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, "TEXCOORD", texgens + 2); } + + DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, "SV_ClipDistance", 0); + DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, "SV_ClipDistance", 1); } template @@ -216,6 +219,9 @@ inline void AssignVSOutputMembers(T& object, const char* a, const char* b, u32 t object.Write("\t%s.Normal = %s.Normal;\n", a, b); object.Write("\t%s.WorldPos = %s.WorldPos;\n", a, b); } + + object.Write("\t%s.clipDist0 = %s.clipDist0;\n", a, b); + object.Write("\t%s.clipDist1 = %s.clipDist1;\n", a, b); } // We use the flag "centroid" to fix some MSAA rendering bugs. With MSAA, the diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index aa69550bd8..a42f090716 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -399,28 +399,43 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da out.Write("o.colors_1 = color1;\n"); } - // write the true depth value, if the game uses depth textures pixel shaders will override with - // the correct values - // if not early z culling will improve speed - if (g_ActiveConfig.backend_info.bSupportsClipControl) + // Write the true depth value. If the game uses depth textures, then the pixel shader will + // override it with the correct values if not then early z culling will improve speed. + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) { + // If we can disable the incorrect depth clipping planes using depth clamping, then we can do + // our own depth clipping and calculate the depth range before the perspective divide. + + // Since we're adjusting z for the depth range before the perspective divide, we have to do our + // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range. + out.Write("o.clipDist0 = o.pos.z + o.pos.w;\n"); // Near: z < -w + out.Write("o.clipDist1 = -o.pos.z;\n"); // Far: z > 0 + + // Adjust z for the depth range. We're using an equation which incorperates a depth inversion, + // so we can map the console -1..0 range to the 0..1 range used in the depth buffer. + // We have to handle the depth range in the vertex shader instead of after the perspective + // divide, because some games will use a depth range larger than what is allowed by the + // graphics API. These large depth ranges will still be clipped to the 0..1 range, so these + // games effectively add a depth bias to the values written to the depth buffer. + out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - " + "o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n"); + } + else + { + // If we can't disable the incorrect depth clipping planes, then we need to rely on the + // graphics API to handle the depth range after the perspective divide. This can result in + // inaccurate depth values due to the missing depth bias, but that can be least corrected by + // overriding depth values in the pixel shader. We still need to take care of the reversed depth + // though, so we do that here. out.Write("o.pos.z = -o.pos.z;\n"); } - else // OGL + + if (!g_ActiveConfig.backend_info.bSupportsClipControl) { - // this results in a scale from -1..0 to -1..1 after perspective - // divide - out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n"); - - // the next steps of the OGL pipeline are: - // (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology - // clipping to -w_c <= (x_c,y_c,z_c) <= w_c - // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide - // z_w = (f-n)/2*z_d + (n+f)/2 - // z_w now contains the value to go to the 0..1 depth buffer - - // trying to get the correct semantic while not using glDepthRange - // seems to get rather complicated + // If the graphics API doesn't support a depth range of 0..1, then we need to map z to + // the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point + // operation that can introduce a round-trip error. + out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n"); } // The console GPU places the pixel center at 7/12 in screen space unless @@ -453,6 +468,11 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da out.Write("colors_1 = o.colors_1;\n"); } + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + { + out.Write("gl_ClipDistance[0] = o.clipDist0;\n"); + out.Write("gl_ClipDistance[1] = o.clipDist1;\n"); + } out.Write("gl_Position = o.pos;\n"); } else // D3D diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index 19fb1f371d..9b1c3437fe 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -386,6 +386,15 @@ void VertexShaderManager::SetConstants() const float pixel_size_y = 2.f / Renderer::EFBToScaledXf(2.f * xfmem.viewport.ht); constants.pixelcentercorrection[0] = pixel_center_correction * pixel_size_x; constants.pixelcentercorrection[1] = pixel_center_correction * pixel_size_y; + + // The depth range is handled in the vertex shader. We need to reverse + // the far value to get a reversed depth range mapping. This is necessary + // because the standard depth range equation pushes all depth values towards + // the back of the depth buffer where conventionally depth buffers have the + // least precision. + constants.pixelcentercorrection[2] = xfmem.viewport.zRange / 16777215.0f; + constants.pixelcentercorrection[3] = 1.0f - xfmem.viewport.farZ / 16777215.0f; + dirty = true; // This is so implementation-dependent that we can't have it here. g_renderer->SetViewport(); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 92be4726ef..3a1f984e87 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -171,6 +171,7 @@ struct VideoConfig final bool bSupportsPaletteConversion; bool bSupportsClipControl; // Needed by VertexShaderGen, so must stay in VideoCommon bool bSupportsSSAA; + bool bSupportsDepthClamp; // Needed by VertexShaderGen, so must stay in VideoCommon } backend_info; // Utility