From f9c829c7f711efadea1db7aabb51c703380fa03a Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 25 Feb 2018 17:56:09 +1000 Subject: [PATCH] OGL: Re-implement async shader compiling --- Source/Core/VideoBackends/D3D/main.cpp | 1 + .../Core/VideoBackends/Null/NullBackend.cpp | 1 + Source/Core/VideoBackends/OGL/OGLPipeline.cpp | 9 +- .../VideoBackends/OGL/ProgramShaderCache.cpp | 100 +++++++++++++++--- .../VideoBackends/OGL/ProgramShaderCache.h | 16 ++- Source/Core/VideoBackends/OGL/Render.cpp | 13 ++- Source/Core/VideoBackends/OGL/Render.h | 2 + Source/Core/VideoBackends/Software/SWmain.cpp | 1 + .../VideoBackends/Vulkan/VulkanContext.cpp | 1 + Source/Core/VideoCommon/DriverDetails.cpp | 2 - Source/Core/VideoCommon/DriverDetails.h | 6 +- Source/Core/VideoCommon/RenderBase.cpp | 5 + Source/Core/VideoCommon/RenderBase.h | 3 + Source/Core/VideoCommon/ShaderCache.cpp | 2 +- Source/Core/VideoCommon/VideoConfig.cpp | 6 +- Source/Core/VideoCommon/VideoConfig.h | 1 + 16 files changed, 136 insertions(+), 33 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index 3c3237402c..fb8c18ae5e 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -70,6 +70,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsDynamicSamplerIndexing = false; g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsFramebufferFetch = false; + g_Config.backend_info.bSupportsBackgroundCompiling = true; IDXGIFactory2* factory; IDXGIAdapter* ad; diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index 6e260f08d5..430da35f84 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -46,6 +46,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsFramebufferFetch = false; + g_Config.backend_info.bSupportsBackgroundCompiling = false; // aamodes: We only support 1 sample, so no MSAA g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/OGLPipeline.cpp b/Source/Core/VideoBackends/OGL/OGLPipeline.cpp index 9d93381d46..bd439898db 100644 --- a/Source/Core/VideoBackends/OGL/OGLPipeline.cpp +++ b/Source/Core/VideoBackends/OGL/OGLPipeline.cpp @@ -46,10 +46,11 @@ OGLPipeline::~OGLPipeline() std::unique_ptr OGLPipeline::Create(const AbstractPipelineConfig& config) { - const PipelineProgram* program = - ProgramShaderCache::GetPipelineProgram(static_cast(config.vertex_shader), - static_cast(config.geometry_shader), - static_cast(config.pixel_shader)); + const PipelineProgram* program = ProgramShaderCache::GetPipelineProgram( + static_cast(config.vertex_format), + static_cast(config.vertex_shader), + static_cast(config.geometry_shader), + static_cast(config.pixel_shader)); if (!program) return nullptr; diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index d953996396..60b7da82b9 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -51,9 +51,10 @@ static std::unique_ptr s_buffer; static int num_failures = 0; static GLuint CurrentProgram = 0; -ProgramShaderCache::PipelineProgramMap ProgramShaderCache::pipelineprograms; -std::mutex ProgramShaderCache::pipelineprogramlock; +ProgramShaderCache::PipelineProgramMap ProgramShaderCache::s_pipeline_programs; +std::mutex ProgramShaderCache::s_pipeline_program_lock; static std::string s_glsl_header = ""; +static thread_local bool s_is_shared_context = false; static std::string GetGLSLVersionString() { @@ -506,8 +507,8 @@ void ProgramShaderCache::Shutdown() s_last_VAO = 0; // All pipeline programs should have been released. - _dbg_assert_(VIDEO, pipelineprograms.empty()); - pipelineprograms.clear(); + _dbg_assert_(VIDEO, s_pipeline_programs.empty()); + s_pipeline_programs.clear(); } void ProgramShaderCache::CreateAttributelessVAO() @@ -548,21 +549,28 @@ void ProgramShaderCache::InvalidateLastProgram() CurrentProgram = 0; } -const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* vertex_shader, +const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexFormat* vertex_format, + const OGLShader* vertex_shader, const OGLShader* geometry_shader, const OGLShader* pixel_shader) { PipelineProgramKey key = {vertex_shader, geometry_shader, pixel_shader}; { - std::lock_guard guard(pipelineprogramlock); - auto iter = pipelineprograms.find(key); - if (iter != pipelineprograms.end()) + std::lock_guard guard(s_pipeline_program_lock); + auto iter = s_pipeline_programs.find(key); + if (iter != s_pipeline_programs.end()) { iter->second->reference_count++; return iter->second.get(); } } + // We temporarily change the vertex array to the pipeline's vertex format. + // This can prevent the NVIDIA OpenGL driver from recompiling on first use. + GLuint vao = vertex_format ? vertex_format->VAO : s_attributeless_VAO; + if (s_is_shared_context || vao != s_last_VAO) + glBindVertexArray(vao); + std::unique_ptr prog = std::make_unique(); prog->key = key; @@ -581,6 +589,11 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v // Link program. prog->shader.SetProgramBindings(false); glLinkProgram(prog->shader.glprogid); + + // Restore VAO binding after linking. + if (!s_is_shared_context && vao != s_last_VAO) + glBindVertexArray(s_last_VAO); + if (!ProgramShaderCache::CheckProgramLinkResult(prog->shader.glprogid, {}, {}, {})) { prog->shader.Destroy(); @@ -588,9 +601,9 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v } // Lock to insert. A duplicate program may have been created in the meantime. - std::lock_guard guard(pipelineprogramlock); - auto iter = pipelineprograms.find(key); - if (iter != pipelineprograms.end()) + std::lock_guard guard(s_pipeline_program_lock); + auto iter = s_pipeline_programs.find(key); + if (iter != s_pipeline_programs.end()) { // Destroy this program, and use the one which was created first. prog->shader.Destroy(); @@ -601,19 +614,25 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v // Set program variables on the shader which will be returned. // This is only needed for drivers which don't support binding layout. prog->shader.SetProgramVariables(); - auto ip = pipelineprograms.emplace(key, std::move(prog)); + + // If this is a shared context, ensure we sync before we return the program to + // the main thread. If we don't do this, some driver can lock up (e.g. AMD). + if (s_is_shared_context) + glFinish(); + + auto ip = s_pipeline_programs.emplace(key, std::move(prog)); return ip.first->second.get(); } void ProgramShaderCache::ReleasePipelineProgram(const PipelineProgram* prog) { - auto iter = pipelineprograms.find(prog->key); - _assert_(iter != pipelineprograms.end() && prog == iter->second.get()); + auto iter = s_pipeline_programs.find(prog->key); + _assert_(iter != s_pipeline_programs.end() && prog == iter->second.get()); if (--iter->second->reference_count == 0) { iter->second->shader.Destroy(); - pipelineprograms.erase(iter); + s_pipeline_programs.erase(iter); } } @@ -783,4 +802,55 @@ void ProgramShaderCache::CreateHeader() v > GlslEs300 ? "precision highp sampler2DMS;" : "", v >= GlslEs310 ? "precision highp image2DArray;" : ""); } + +bool SharedContextAsyncShaderCompiler::WorkerThreadInitMainThread(void** param) +{ + std::unique_ptr context = GLInterface->CreateSharedContext(); + if (!context) + { + PanicAlert("Failed to create shared context for shader compiling."); + return false; + } + + *param = context.release(); + return true; +} + +bool SharedContextAsyncShaderCompiler::WorkerThreadInitWorkerThread(void* param) +{ + cInterfaceBase* context = static_cast(param); + if (!context->MakeCurrent()) + return false; + + s_is_shared_context = true; + if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart) + { + if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3) + { + glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX); + } + else + { + if (GLExtensions::Version() >= 310) + { + glEnable(GL_PRIMITIVE_RESTART); + glPrimitiveRestartIndex(65535); + } + else + { + glEnableClientState(GL_PRIMITIVE_RESTART_NV); + glPrimitiveRestartIndexNV(65535); + } + } + } + + return true; +} + +void SharedContextAsyncShaderCompiler::WorkerThreadExit(void* param) +{ + cInterfaceBase* context = static_cast(param); + context->ClearCurrent(); + delete context; +} } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h index af2aa10383..0096c6e5b9 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h @@ -11,6 +11,7 @@ #include #include "Common/GL/GLUtil.h" +#include "VideoCommon/AsyncShaderCompiler.h" namespace OGL { @@ -87,7 +88,8 @@ public: static void Shutdown(); static void CreateHeader(); - static const PipelineProgram* GetPipelineProgram(const OGLShader* vertex_shader, + static const PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format, + const OGLShader* vertex_shader, const OGLShader* geometry_shader, const OGLShader* pixel_shader); static void ReleasePipelineProgram(const PipelineProgram* prog); @@ -99,8 +101,8 @@ private: static void CreateAttributelessVAO(); - static PipelineProgramMap pipelineprograms; - static std::mutex pipelineprogramlock; + static PipelineProgramMap s_pipeline_programs; + static std::mutex s_pipeline_program_lock; static u32 s_ubo_buffer_size; static s32 s_ubo_align; @@ -110,4 +112,12 @@ private: static GLuint s_last_VAO; }; +class SharedContextAsyncShaderCompiler : public VideoCommon::AsyncShaderCompiler +{ +protected: + bool WorkerThreadInitMainThread(void** param) override; + bool WorkerThreadInitWorkerThread(void* param) override; + void WorkerThreadExit(void* param) override; +}; + } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index ac08ae57cd..5319e41cbd 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -81,8 +81,8 @@ static bool s_efbCacheIsCleared = false; static std::vector s_efbCache[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; // 2 for PeekZ and PeekColor -static void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity, - GLsizei length, const char* message, const void* userParam) +void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, + const char* message, const void* userParam) { const char* s_source; const char* s_type; @@ -677,6 +677,10 @@ Renderer::Renderer() g_Config.backend_info.bSupportsPaletteConversion && g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore; + // Background compiling is supported only when shared contexts aren't broken. + g_Config.backend_info.bSupportsBackgroundCompiling = + !DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION); + if (g_ogl_config.bSupportsDebug) { if (GLExtensions::Supports("GL_KHR_debug")) @@ -1695,4 +1699,9 @@ void Renderer::DispatchComputeShader(const AbstractShader* shader, const void* u glDispatchCompute(groups_x, groups_y, groups_z); ProgramShaderCache::InvalidateLastProgram(); } + +std::unique_ptr Renderer::CreateAsyncShaderCompiler() +{ + return std::make_unique(); +} } diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index a9124ad4fb..3f1277a0c2 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -139,6 +139,8 @@ public: void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, u32 uniforms_size, u32 groups_x, u32 groups_y, u32 groups_z) override; + std::unique_ptr CreateAsyncShaderCompiler() override; + private: void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, const TargetRectangle& targetPixelRc, const void* data); diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index 504c008511..8765a28e4c 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -73,6 +73,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsCopyToVram = false; g_Config.backend_info.bSupportsFramebufferFetch = false; + g_Config.backend_info.bSupportsBackgroundCompiling = false; // aamodes g_Config.backend_info.AAModes = {1}; diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 77a7a8ffe1..f9c254a04d 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -235,6 +235,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsBitfield = true; // Assumed support. config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support. config->backend_info.bSupportsPostProcessing = true; // Assumed support. + config->backend_info.bSupportsBackgroundCompiling = true; // Assumed support. config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features. config->backend_info.bSupportsGeometryShaders = false; // Dependent on features. config->backend_info.bSupportsGSInstancing = false; // Dependent on features. diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index d280a6a9b4..8fd990395d 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -102,8 +102,6 @@ static BugInfo m_known_bugs[] = { true}, {API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true}, - {API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN, - BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true}, {API_VULKAN, OS_ALL, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKEN_MSAA_CLEAR, -1.0, -1.0, true}, {API_VULKAN, OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN, diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index df06a77d92..bc96ee2aab 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -252,8 +252,10 @@ enum Bug // the negated value to a temporary variable then using that in the bitwise op. BUG_BROKEN_BITWISE_OP_NEGATION, - // Bug: Shaders are recompiled on the main thread after being previously compiled on - // a worker thread on Mesa i965. + // BUG: The GPU shader code appears to be context-specific on Mesa/i965. + // This means that if we compiled the ubershaders asynchronously, they will be recompiled + // on the main thread the first time they are used, causing stutter. For now, disable + // asynchronous compilation on Mesa i965. // Started version: -1 // Ended Version: -1 BUG_SHARED_CONTEXT_SHADER_COMPILATION, diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 4c03168d20..a5e5272651 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -1027,3 +1027,8 @@ bool Renderer::UseVertexDepthRange() const // in the vertex shader. return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f; } + +std::unique_ptr Renderer::CreateAsyncShaderCompiler() +{ + return std::make_unique(); +} diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index da8e0b819f..b906cd2558 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -28,6 +28,7 @@ #include "Common/Flag.h" #include "Common/MathUtil.h" #include "VideoCommon/AVIDump.h" +#include "VideoCommon/AsyncShaderCompiler.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/FPSCounter.h" #include "VideoCommon/RenderState.h" @@ -189,6 +190,8 @@ public: void ResizeSurface(int new_width, int new_height); bool UseVertexDepthRange() const; + virtual std::unique_ptr CreateAsyncShaderCompiler(); + virtual void Shutdown(); // Drawing utility shaders. diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 05624e9fe4..1e4a0ef741 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -27,7 +27,7 @@ bool ShaderCache::Initialize() m_efb_multisamples = g_ActiveConfig.iMultisamples; // Create the async compiler, and start the worker threads. - m_async_shader_compiler = std::make_unique(); + m_async_shader_compiler = g_renderer->CreateAsyncShaderCompiler(); m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads()); // Load shader and UID caches. diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 771adf60bf..d8c0dfaa6c 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -187,8 +187,7 @@ static u32 GetNumAutoShaderCompilerThreads() u32 VideoConfig::GetShaderCompilerThreads() const { - // videocommon shader cache is currently broken on OGL, needs multiple contexts. - if (backend_info.api_type == APIType::OpenGL) + if (!backend_info.bSupportsBackgroundCompiling) return 0; if (iShaderCompilerThreads >= 0) @@ -199,8 +198,7 @@ u32 VideoConfig::GetShaderCompilerThreads() const u32 VideoConfig::GetShaderPrecompilerThreads() const { - // videocommon shader cache is currently broken on OGL, needs multiple contexts. - if (backend_info.api_type == APIType::OpenGL) + if (!backend_info.bSupportsBackgroundCompiling) return 0; if (iShaderPrecompilerThreads >= 0) diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index fb05408303..7839cb0e8f 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -227,6 +227,7 @@ struct VideoConfig final bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon bool bSupportsBPTCTextures; bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES + bool bSupportsBackgroundCompiling; } backend_info; // Utility