diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 60a5b4bc99..67f96e2380 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -554,6 +554,22 @@ void ProgramShaderCache::CreateHeader() break; } + const char* earlyz_string = ""; + if (!is_glsles && g_ActiveConfig.backend_info.bSupportsEarlyZ) + { + if (g_ogl_config.bSupportsEarlyFragmentTests) + { + earlyz_string = "#extension GL_ARB_shader_image_load_store : enable\n" + "#define FORCE_EARLY_Z layout(early_fragment_tests) in\n"; + } + else if(g_ogl_config.bSupportsConservativeDepth) + { + // See PixelShaderGen for details about this fallback. + earlyz_string = "#extension GL_ARB_conservative_depth : enable\n" + "#define FORCE_EARLY_Z layout(depth_unchanged) out float gl_FragDepth\n"; + } + } + snprintf(s_glsl_header, sizeof(s_glsl_header), "%s\n" "%s\n" // ubo @@ -594,7 +610,7 @@ void ProgramShaderCache::CreateHeader() , GetGLSLVersionString().c_str() , v < GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : "" - , !is_glsles && g_ActiveConfig.backend_info.bSupportsEarlyZ ? "#extension GL_ARB_shader_image_load_store : enable" : "" + , earlyz_string , (g_ActiveConfig.backend_info.bSupportsBindingLayout && v < GLSLES_310) ? "#extension GL_ARB_shading_language_420pack : enable" : "" , (g_ogl_config.bSupportsMSAA && v < GLSL_150) ? "#extension GL_ARB_texture_multisample : enable" : "" , g_ActiveConfig.backend_info.bSupportsBindingLayout ? "#define SAMPLER_BINDING(x) layout(binding = x)" : "#define SAMPLER_BINDING(x)" diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 12f98691b3..f59f527354 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -434,7 +434,6 @@ Renderer::Renderer() GLExtensions::Supports("GL_EXT_blend_func_extended"); g_Config.backend_info.bSupportsPrimitiveRestart = !DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVERESTART) && ((GLExtensions::Version() >= 310) || GLExtensions::Supports("GL_NV_primitive_restart")); - g_Config.backend_info.bSupportsEarlyZ = GLExtensions::Supports("GL_ARB_shader_image_load_store"); g_Config.backend_info.bSupportsBBox = GLExtensions::Supports("GL_ARB_shader_storage_buffer_object"); g_Config.backend_info.bSupportsGSInstancing = GLExtensions::Supports("GL_ARB_gpu_shader5"); g_Config.backend_info.bSupportsSSAA = GLExtensions::Supports("GL_ARB_gpu_shader5") && GLExtensions::Supports("GL_ARB_sample_shading"); @@ -469,6 +468,8 @@ Renderer::Renderer() g_ogl_config.bSupports3DTextureStorage = GLExtensions::Supports("GL_ARB_texture_storage_multisample") || GLExtensions::Supports("GL_OES_texture_storage_multisample_2d_array"); g_ogl_config.bSupports2DTextureStorage = GLExtensions::Supports("GL_ARB_texture_storage_multisample"); + g_ogl_config.bSupportsEarlyFragmentTests = GLExtensions::Supports("GL_ARB_shader_image_load_store"); + g_ogl_config.bSupportsConservativeDepth = GLExtensions::Supports("GL_ARB_conservative_depth"); if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3) { @@ -488,7 +489,7 @@ Renderer::Renderer() g_ogl_config.eSupportedGLSLVersion = GLSLES_310; g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a"); g_Config.backend_info.bSupportsBindingLayout = true; - g_Config.backend_info.bSupportsEarlyZ = true; + g_ogl_config.bSupportsEarlyFragmentTests = true; g_Config.backend_info.bSupportsGeometryShaders = g_ogl_config.bSupportsAEP; g_Config.backend_info.bSupportsGSInstancing = g_Config.backend_info.bSupportsGeometryShaders && g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP; @@ -507,7 +508,7 @@ Renderer::Renderer() g_ogl_config.eSupportedGLSLVersion = GLSLES_320; g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a"); g_Config.backend_info.bSupportsBindingLayout = true; - g_Config.backend_info.bSupportsEarlyZ = true; + g_ogl_config.bSupportsEarlyFragmentTests = true; g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupportsGSInstancing = g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsPaletteConversion = true; @@ -533,13 +534,15 @@ Renderer::Renderer() else if (strstr(g_ogl_config.glsl_version, "1.30")) { g_ogl_config.eSupportedGLSLVersion = GLSL_130; - g_Config.backend_info.bSupportsEarlyZ = false; // layout keyword is only supported on glsl150+ + g_ogl_config.bSupportsEarlyFragmentTests = false; // layout keyword is only supported on glsl150+ + g_ogl_config.bSupportsConservativeDepth = false; // layout keyword is only supported on glsl150+ g_Config.backend_info.bSupportsGeometryShaders = false; // geometry shaders are only supported on glsl150+ } else if (strstr(g_ogl_config.glsl_version, "1.40")) { g_ogl_config.eSupportedGLSLVersion = GLSL_140; - g_Config.backend_info.bSupportsEarlyZ = false; // layout keyword is only supported on glsl150+ + g_ogl_config.bSupportsEarlyFragmentTests = false; // layout keyword is only supported on glsl150+ + g_ogl_config.bSupportsConservativeDepth = false; // layout keyword is only supported on glsl150+ g_Config.backend_info.bSupportsGeometryShaders = false; // geometry shaders are only supported on glsl150+ } else if (strstr(g_ogl_config.glsl_version, "1.50")) @@ -560,6 +563,9 @@ Renderer::Renderer() g_ogl_config.bSupportsAEP = false; } + // Either method can do early-z tests. See PixelShaderGen for details. + g_Config.backend_info.bSupportsEarlyZ = g_ogl_config.bSupportsEarlyFragmentTests || g_ogl_config.bSupportsConservativeDepth; + if (g_ogl_config.bSupportsDebug) { if (GLExtensions::Supports("GL_KHR_debug")) diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index 3b9c70ae61..a99438533b 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -50,6 +50,8 @@ struct VideoConfig ES_TEXBUF_TYPE SupportedESTextureBuffer; bool bSupports2DTextureStorage; bool bSupports3DTextureStorage; + bool bSupportsEarlyFragmentTests; + bool bSupportsConservativeDepth; const char* gl_vendor; const char* gl_renderer; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 8298d355f6..628d6343c8 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -306,14 +306,30 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // Most importantly, it was not possible to write to the depth buffer without also writing // a color value (unless color writing was disabled altogether). - // OpenGL has a flag which allows the driver to still update the depth buffer if alpha - // test fails. The driver isn't required to do this, but I (degasus) assume all of them do - // because it's the much faster code path for the GPU. + // OpenGL 4.2 actually provides two extensions which can force an early z test: + // * ARB_image_load_store has 'layout(early_fragment_tests)' which forces the driver to do z and stencil tests early. + // * ARB_conservative_depth has 'layout(depth_unchanged) which signals to the driver that it can make optimisations + // which assume the pixel shader won't update the depth buffer. + + // early_fragment_tests is the best option, as it requires the driver to do early-z and defines early-z excatly as + // we expect, with discard causing the shader to exit with only the depth buffer updated. + + // Conservative depth's 'depth_unchanged' only hints to the driver that an early-z optimistaion can be made and + // doesn't define what will happen if we discard the fragment. But the way modern graphics hardware is implemented + // means it is not unreasonable to expect the the same behaviour as early_fragment_tests. + // We can also assume that if a driver has gone out of it's way to support conservative depth and not image_load_store + // as required by OpenGL 4.2 that it will be doing the optimisation. + // If the driver doesn't actually do an early z optimisation, ZCompLoc will be broken and depth will only be written + // if the alpha test passes. + + // We support Conservative as a fallback, because many drivers based on Mesa haven't implemented all of the + // ARB_image_load_store extension yet. // D3D11 also has a way to force the driver to enable early-z, so we're fine here. if(ApiType == API_OPENGL) { - out.Write("layout(early_fragment_tests) in;\n"); + // This is a #define which signals whatever early-z method the driver supports. + out.Write("FORCE_EARLY_Z; \n"); } else {