From c8f31656cb1c5f5e3c83a5b977fd822e987cb596 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 27 Jul 2017 13:15:38 +1000 Subject: [PATCH] VideoBackends: Support a different number of threads for precompiling At runtime, we only really want a single shader compiler thread. However, for initial boots, we can use a higher number to speed things up. --- Source/Core/Core/Config/GraphicsSettings.cpp | 2 ++ Source/Core/Core/Config/GraphicsSettings.h | 1 + .../Core/ConfigLoaders/IsSettingSaveable.cpp | 1 + .../VideoBackends/D3D/PixelShaderCache.cpp | 6 ++--- .../Core/VideoBackends/D3D/PixelShaderCache.h | 2 +- .../VideoBackends/D3D/VertexShaderCache.cpp | 17 ++++++++++---- .../VideoBackends/D3D/VertexShaderCache.h | 3 ++- Source/Core/VideoBackends/D3D/main.cpp | 1 + .../VideoBackends/OGL/ProgramShaderCache.cpp | 23 +++++++++++-------- .../Core/VideoBackends/Vulkan/ShaderCache.cpp | 9 +++++--- .../Core/VideoCommon/AsyncShaderCompiler.cpp | 20 +++++++++++++++- Source/Core/VideoCommon/AsyncShaderCompiler.h | 3 ++- Source/Core/VideoCommon/VideoConfig.cpp | 18 +++++++++++++-- Source/Core/VideoCommon/VideoConfig.h | 2 ++ 14 files changed, 81 insertions(+), 27 deletions(-) diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index c827ff3ab8..e7c195a4cb 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -85,6 +85,8 @@ const ConfigInfo GFX_PRECOMPILE_UBER_SHADERS{ {System::GFX, "Settings", "PrecompileUberShaders"}, true}; const ConfigInfo GFX_SHADER_COMPILER_THREADS{ {System::GFX, "Settings", "ShaderCompilerThreads"}, 1}; +const ConfigInfo GFX_SHADER_PRECOMPILER_THREADS{ + {System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1}; const ConfigInfo GFX_FORCE_VERTEX_UBER_SHADERS{ {System::GFX, "Settings", "ForceVertexUberShaders"}, false}; const ConfigInfo GFX_FORCE_PIXEL_UBER_SHADERS{ diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 42f4b52bd0..3ec39f300f 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -63,6 +63,7 @@ extern const ConfigInfo GFX_BACKGROUND_SHADER_COMPILING; extern const ConfigInfo GFX_DISABLE_SPECIALIZED_SHADERS; extern const ConfigInfo GFX_PRECOMPILE_UBER_SHADERS; extern const ConfigInfo GFX_SHADER_COMPILER_THREADS; +extern const ConfigInfo GFX_SHADER_PRECOMPILER_THREADS; extern const ConfigInfo GFX_FORCE_VERTEX_UBER_SHADERS; extern const ConfigInfo GFX_FORCE_PIXEL_UBER_SHADERS; diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp index 76a93aad55..74eb2cd85c 100644 --- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp +++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp @@ -46,6 +46,7 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location) Config::GFX_BACKGROUND_SHADER_COMPILING.location, Config::GFX_DISABLE_SPECIALIZED_SHADERS.location, Config::GFX_PRECOMPILE_UBER_SHADERS.location, Config::GFX_SHADER_COMPILER_THREADS.location, + Config::GFX_SHADER_PRECOMPILER_THREADS.location, Config::GFX_FORCE_VERTEX_UBER_SHADERS.location, Config::GFX_FORCE_PIXEL_UBER_SHADERS.location, Config::GFX_SW_ZCOMPLOC.location, Config::GFX_SW_ZFREEZE.location, diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index f173551da3..69dedb4d4b 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -513,7 +513,7 @@ void PixelShaderCache::Init() LoadShaderCache(); if (g_ActiveConfig.CanPrecompileUberShaders()) - PrecompileUberShaders(); + QueueUberShaderCompiles(); } void PixelShaderCache::LoadShaderCache() @@ -538,7 +538,7 @@ void PixelShaderCache::Reload() LoadShaderCache(); if (g_ActiveConfig.CanPrecompileUberShaders()) - PrecompileUberShaders(); + QueueUberShaderCompiles(); } // ONLY to be used during shutdown. @@ -757,7 +757,7 @@ bool PixelShaderCache::InsertShader(const UberShader::PixelShaderUid& uid, return (shader != nullptr); } -void PixelShaderCache::PrecompileUberShaders() +void PixelShaderCache::QueueUberShaderCompiles() { UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& uid) { if (UberPixelShaders.find(uid) != UberPixelShaders.end()) diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.h b/Source/Core/VideoBackends/D3D/PixelShaderCache.h index 7f1bd56b33..fcdd55e67b 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.h +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.h @@ -28,7 +28,7 @@ public: static bool InsertByteCode(const UberShader::PixelShaderUid& uid, const u8* data, size_t len); static bool InsertShader(const PixelShaderUid& uid, ID3D11PixelShader* shader); static bool InsertShader(const UberShader::PixelShaderUid& uid, ID3D11PixelShader* shader); - static void PrecompileUberShaders(); + static void QueueUberShaderCompiles(); static ID3D11Buffer* GetConstantBuffer(); diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp index 8df1ec718f..a90de7c105 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp @@ -174,11 +174,12 @@ void VertexShaderCache::Init() LoadShaderCache(); g_async_compiler = std::make_unique(); - if (g_ActiveConfig.GetShaderCompilerThreads() > 0) - g_async_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); + g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ? + g_ActiveConfig.GetShaderPrecompilerThreads() : + g_ActiveConfig.GetShaderCompilerThreads()); if (g_ActiveConfig.CanPrecompileUberShaders()) - PrecompileUberShaders(); + QueueUberShaderCompiles(); } void VertexShaderCache::LoadShaderCache() @@ -206,7 +207,7 @@ void VertexShaderCache::Reload() LoadShaderCache(); if (g_ActiveConfig.CanPrecompileUberShaders()) - PrecompileUberShaders(); + QueueUberShaderCompiles(); } void VertexShaderCache::Clear() @@ -426,7 +427,7 @@ void VertexShaderCache::RetreiveAsyncShaders() g_async_compiler->RetrieveWorkItems(); } -void VertexShaderCache::PrecompileUberShaders() +void VertexShaderCache::QueueUberShaderCompiles() { UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& uid) { if (ubervshaders.find(uid) != ubervshaders.end()) @@ -435,13 +436,19 @@ void VertexShaderCache::PrecompileUberShaders() g_async_compiler->QueueWorkItem( g_async_compiler->CreateWorkItem(uid)); }); +} +void VertexShaderCache::WaitForBackgroundCompilesToComplete() +{ g_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) { Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(), static_cast(completed), static_cast(total)); }); g_async_compiler->RetrieveWorkItems(); Host_UpdateProgressDialog("", -1, -1); + + // Switch from precompile -> runtime compiler threads. + g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); } VertexShaderCache::VertexShaderCompilerWorkItem::VertexShaderCompilerWorkItem( diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.h b/Source/Core/VideoBackends/D3D/VertexShaderCache.h index 04c1a05821..dd3b07afa0 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.h +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.h @@ -27,7 +27,8 @@ public: static bool SetShader(D3DVertexFormat* vertex_format); static bool SetUberShader(D3DVertexFormat* vertex_format); static void RetreiveAsyncShaders(); - static void PrecompileUberShaders(); + static void QueueUberShaderCompiles(); + static void WaitForBackgroundCompilesToComplete(); static ID3D11Buffer*& GetConstantBuffer(); diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index 9063c735d5..ffa15a601c 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -161,6 +161,7 @@ void VideoBackend::Video_Prepare() VertexShaderCache::Init(); PixelShaderCache::Init(); GeometryShaderCache::Init(); + VertexShaderCache::WaitForBackgroundCompilesToComplete(); D3D::InitUtils(); BBox::Init(); } diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 6cdc240ce6..e2d201e9b7 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -607,17 +607,8 @@ void ProgramShaderCache::Init() // on the main thread the first time they are used, causing stutter. Nouveau has been // reported to crash if draw calls are invoked on the shared context threads. For now, // disable asynchronous compilation on Mesa. - if (!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION) && - g_ActiveConfig.GetShaderCompilerThreads() > 0) - { + if (!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION)) s_async_compiler = std::make_unique(); - s_async_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); - if (!s_async_compiler->HasWorkerThreads()) - { - // No point using the async compiler without workers. - s_async_compiler.reset(); - } - } // Read our shader cache, only if supported and enabled if (g_ogl_config.bSupportsGLSLCache && g_ActiveConfig.bShaderCache) @@ -630,7 +621,19 @@ void ProgramShaderCache::Init() last_uber_entry = nullptr; if (g_ActiveConfig.CanPrecompileUberShaders()) + { + if (s_async_compiler) + s_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads()); PrecompileUberShaders(); + } + + if (s_async_compiler) + { + // No point using the async compiler without workers. + s_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); + if (!s_async_compiler->HasWorkerThreads()) + s_async_compiler.reset(); + } } void ProgramShaderCache::RetrieveAsyncShaders() diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp index 07b71a6202..7d5c3e2379 100644 --- a/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp @@ -63,9 +63,9 @@ bool ShaderCache::Initialize() return false; m_async_shader_compiler = std::make_unique(); - if (g_ActiveConfig.GetShaderCompilerThreads() > 0) - m_async_shader_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); - + m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ? + g_ActiveConfig.GetShaderPrecompilerThreads() : + g_ActiveConfig.GetShaderCompilerThreads()); return true; } @@ -1236,6 +1236,9 @@ void ShaderCache::PrecompileUberShaders() }); WaitForBackgroundCompilesToComplete(); + + // Switch to the runtime/background thread config. + m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); } void ShaderCache::WaitForBackgroundCompilesToComplete() diff --git a/Source/Core/VideoCommon/AsyncShaderCompiler.cpp b/Source/Core/VideoCommon/AsyncShaderCompiler.cpp index 15eececc4c..59ef9762f1 100644 --- a/Source/Core/VideoCommon/AsyncShaderCompiler.cpp +++ b/Source/Core/VideoCommon/AsyncShaderCompiler.cpp @@ -106,8 +106,11 @@ void AsyncShaderCompiler::WaitUntilCompletion( } } -void AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads) +bool AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads) { + if (num_worker_threads == 0) + return true; + for (u32 i = 0; i < num_worker_threads; i++) { void* thread_param = nullptr; @@ -131,6 +134,17 @@ void AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads) m_worker_threads.push_back(std::move(thr)); } + + return HasWorkerThreads(); +} + +bool AsyncShaderCompiler::ResizeWorkerThreads(u32 num_worker_threads) +{ + if (m_worker_threads.size() == num_worker_threads) + return true; + + StopWorkerThreads(); + return StartWorkerThreads(num_worker_threads); } bool AsyncShaderCompiler::HasWorkerThreads() const @@ -140,6 +154,9 @@ bool AsyncShaderCompiler::HasWorkerThreads() const void AsyncShaderCompiler::StopWorkerThreads() { + if (!HasWorkerThreads()) + return; + // Signal worker threads to stop, and wake all of them. { std::lock_guard guard(m_pending_work_lock); @@ -151,6 +168,7 @@ void AsyncShaderCompiler::StopWorkerThreads() for (std::thread& thr : m_worker_threads) thr.join(); m_worker_threads.clear(); + m_exit_flag.Clear(); } bool AsyncShaderCompiler::WorkerThreadInitMainThread(void** param) diff --git a/Source/Core/VideoCommon/AsyncShaderCompiler.h b/Source/Core/VideoCommon/AsyncShaderCompiler.h index 88a3d116f4..fb117dab28 100644 --- a/Source/Core/VideoCommon/AsyncShaderCompiler.h +++ b/Source/Core/VideoCommon/AsyncShaderCompiler.h @@ -52,7 +52,8 @@ public: void WaitUntilCompletion(const std::function& progress_callback); // Needed because of calling virtual methods in shutdown procedure. - void StartWorkerThreads(u32 num_worker_threads); + bool StartWorkerThreads(u32 num_worker_threads); + bool ResizeWorkerThreads(u32 num_worker_threads); bool HasWorkerThreads() const; void StopWorkerThreads(); diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 96ec61d596..2b0b169649 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -98,6 +98,7 @@ void VideoConfig::Refresh() bDisableSpecializedShaders = Config::Get(Config::GFX_DISABLE_SPECIALIZED_SHADERS); bPrecompileUberShaders = Config::Get(Config::GFX_PRECOMPILE_UBER_SHADERS); iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS); + iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS); bForceVertexUberShaders = Config::Get(Config::GFX_FORCE_VERTEX_UBER_SHADERS); bForcePixelUberShaders = Config::Get(Config::GFX_FORCE_PIXEL_UBER_SHADERS); @@ -196,13 +197,26 @@ bool VideoConfig::IsVSync() return bVSync && !Core::GetIsThrottlerTempDisabled(); } +static u32 GetNumAutoShaderCompilerThreads() +{ + // Automatic number. We use clamp(cpus - 3, 1, 4). + return static_cast(std::min(std::max(cpu_info.num_cores - 3, 1), 4)); +} + u32 VideoConfig::GetShaderCompilerThreads() const { if (iShaderCompilerThreads >= 0) return static_cast(iShaderCompilerThreads); + else + return GetNumAutoShaderCompilerThreads(); +} - // Automatic number. We use clamp(cpus - 3, 1, 4). - return static_cast(std::min(std::max(cpu_info.num_cores - 3, 1), 4)); +u32 VideoConfig::GetShaderPrecompilerThreads() const +{ + if (iShaderPrecompilerThreads >= 0) + return static_cast(iShaderPrecompilerThreads); + else + return GetNumAutoShaderCompilerThreads(); } bool VideoConfig::CanUseUberShaders() const diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index a8a4d9358d..fd327f2c89 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -192,6 +192,7 @@ struct VideoConfig final // 0 disables background compilation. // -1 uses an automatic number based on the CPU threads. int iShaderCompilerThreads; + int iShaderPrecompilerThreads; // Temporary toggling of ubershaders, for debugging bool bForceVertexUberShaders; @@ -256,6 +257,7 @@ struct VideoConfig final } bool UseVertexRounding() const { return bVertexRounding && iEFBScale != SCALE_1X; } u32 GetShaderCompilerThreads() const; + u32 GetShaderPrecompilerThreads() const; bool CanUseUberShaders() const; bool CanPrecompileUberShaders() const; bool CanBackgroundCompileShaders() const;