From e31cc1f679db82f6baca1bb3d934e2dd17542d55 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 1 Mar 2018 19:21:06 +1000 Subject: [PATCH] ShaderCache: Implement background shader compilation This enables shaders to be compiled while the game is starting, instead of blocking startup. If a shader is needed before it is compiled, emulation will block. --- Source/Core/Core/Config/GraphicsSettings.cpp | 2 + Source/Core/Core/Config/GraphicsSettings.h | 1 + .../Core/ConfigLoaders/IsSettingSaveable.cpp | 4 +- .../Config/Graphics/GeneralWidget.cpp | 10 ++ .../Config/Graphics/GeneralWidget.h | 1 + Source/Core/DolphinWX/VideoConfigDiag.cpp | 10 ++ .../Core/VideoCommon/AsyncShaderCompiler.cpp | 1 - Source/Core/VideoCommon/GXPipelineTypes.h | 75 ++++++++++ Source/Core/VideoCommon/ShaderCache.cpp | 131 ++++++++++-------- Source/Core/VideoCommon/ShaderCache.h | 90 ++---------- Source/Core/VideoCommon/VertexManagerBase.h | 4 +- Source/Core/VideoCommon/VideoCommon.vcxproj | 3 +- .../VideoCommon/VideoCommon.vcxproj.filters | 9 +- Source/Core/VideoCommon/VideoConfig.cpp | 5 + Source/Core/VideoCommon/VideoConfig.h | 1 + 15 files changed, 202 insertions(+), 145 deletions(-) create mode 100644 Source/Core/VideoCommon/GXPipelineTypes.h diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index b06ccdaf41..10a70c6020 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -76,6 +76,8 @@ const ConfigInfo GFX_BACKEND_MULTITHREADING{ const ConfigInfo GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{ {System::GFX, "Settings", "CommandBufferExecuteInterval"}, 100}; const ConfigInfo GFX_SHADER_CACHE{{System::GFX, "Settings", "ShaderCache"}, true}; +const ConfigInfo GFX_WAIT_FOR_SHADERS_BEFORE_STARTING{ + {System::GFX, "Settings", "WaitForShadersBeforeStarting"}, false}; const ConfigInfo GFX_UBERSHADER_MODE{{System::GFX, "Settings", "UberShaderMode"}, static_cast(UberShaderMode::Disabled)}; const ConfigInfo GFX_SHADER_COMPILER_THREADS{ diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 5f07d1906a..5c1684777b 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -59,6 +59,7 @@ extern const ConfigInfo GFX_ENABLE_VALIDATION_LAYER; extern const ConfigInfo GFX_BACKEND_MULTITHREADING; extern const ConfigInfo GFX_COMMAND_BUFFER_EXECUTE_INTERVAL; extern const ConfigInfo GFX_SHADER_CACHE; +extern const ConfigInfo GFX_WAIT_FOR_SHADERS_BEFORE_STARTING; extern const ConfigInfo GFX_UBERSHADER_MODE; extern const ConfigInfo GFX_SHADER_COMPILER_THREADS; extern const ConfigInfo GFX_SHADER_PRECOMPILER_THREADS; diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp index abc30b24d3..ef71c085ab 100644 --- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp +++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp @@ -46,8 +46,8 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location) Config::GFX_DISABLE_FOG.location, Config::GFX_BORDERLESS_FULLSCREEN.location, Config::GFX_ENABLE_VALIDATION_LAYER.location, Config::GFX_BACKEND_MULTITHREADING.location, Config::GFX_COMMAND_BUFFER_EXECUTE_INTERVAL.location, Config::GFX_SHADER_CACHE.location, - Config::GFX_UBERSHADER_MODE.location, Config::GFX_SHADER_COMPILER_THREADS.location, - Config::GFX_SHADER_PRECOMPILER_THREADS.location, + Config::GFX_WAIT_FOR_SHADERS_BEFORE_STARTING.location, Config::GFX_UBERSHADER_MODE.location, + Config::GFX_SHADER_COMPILER_THREADS.location, Config::GFX_SHADER_PRECOMPILER_THREADS.location, Config::GFX_SW_ZCOMPLOC.location, Config::GFX_SW_ZFREEZE.location, Config::GFX_SW_DUMP_OBJECTS.location, Config::GFX_SW_DUMP_TEV_STAGES.location, diff --git a/Source/Core/DolphinQt2/Config/Graphics/GeneralWidget.cpp b/Source/Core/DolphinQt2/Config/Graphics/GeneralWidget.cpp index e76fc6a35c..21281488b8 100644 --- a/Source/Core/DolphinQt2/Config/Graphics/GeneralWidget.cpp +++ b/Source/Core/DolphinQt2/Config/Graphics/GeneralWidget.cpp @@ -87,6 +87,8 @@ void GeneralWidget::CreateWidgets() m_keep_window_top = new QCheckBox(tr("Keep Window on Top")); m_hide_cursor = new QCheckBox(tr("Hide Mouse Cursor")); m_render_main_window = new QCheckBox(tr("Render to Main Window")); + m_wait_for_shaders = new GraphicsBool(tr("Immediately Compile Shaders"), + Config::GFX_WAIT_FOR_SHADERS_BEFORE_STARTING); m_options_box->setLayout(m_options_layout); @@ -101,6 +103,7 @@ void GeneralWidget::CreateWidgets() m_options_layout->addWidget(m_hide_cursor, 3, 0); m_options_layout->addWidget(m_render_main_window, 3, 1); + m_options_layout->addWidget(m_wait_for_shaders, 4, 0); main_layout->addWidget(m_video_box); main_layout->addWidget(m_options_box); @@ -265,6 +268,12 @@ void GeneralWidget::AddDescriptions() static const char* TR_SHOW_NETPLAY_MESSAGES_DESCRIPTION = QT_TR_NOOP("When playing on NetPlay, show chat messages, buffer changes and " "desync alerts.\n\nIf unsure, leave this unchecked."); + static const char* TR_WAIT_FOR_SHADERS_DESCRIPTION = QT_TR_NOOP( + "Waits for all shaders to finish compiling before starting a game. Enabling this " + "option may reduce stuttering or hitching for a short time after the game is " + "started, at the cost of a longer delay before the game starts.\n\nFor systems " + "with two or fewer cores, it is recommended to enable this option, as a large " + "shader queue may reduce frame rates. Otherwise, if unsure, leave this unchecked."); AddDescription(m_backend_combo, TR_BACKEND_DESCRIPTION); #ifdef _WIN32 @@ -282,6 +291,7 @@ void GeneralWidget::AddDescriptions() AddDescription(m_show_messages, TR_SHOW_FPS_DESCRIPTION); AddDescription(m_keep_window_top, TR_KEEP_WINDOW_ON_TOP_DESCRIPTION); AddDescription(m_show_messages, TR_SHOW_NETPLAY_MESSAGES_DESCRIPTION); + AddDescription(m_wait_for_shaders, TR_WAIT_FOR_SHADERS_DESCRIPTION); } void GeneralWidget::OnBackendChanged(const QString& backend_name) { diff --git a/Source/Core/DolphinQt2/Config/Graphics/GeneralWidget.h b/Source/Core/DolphinQt2/Config/Graphics/GeneralWidget.h index e611950dec..8b2d22f331 100644 --- a/Source/Core/DolphinQt2/Config/Graphics/GeneralWidget.h +++ b/Source/Core/DolphinQt2/Config/Graphics/GeneralWidget.h @@ -52,6 +52,7 @@ private: QCheckBox* m_keep_window_top; QCheckBox* m_hide_cursor; QCheckBox* m_render_main_window; + QCheckBox* m_wait_for_shaders; X11Utils::XRRConfiguration* m_xrr_config; }; diff --git a/Source/Core/DolphinWX/VideoConfigDiag.cpp b/Source/Core/DolphinWX/VideoConfigDiag.cpp index 874508459e..f1a57f8b8c 100644 --- a/Source/Core/DolphinWX/VideoConfigDiag.cpp +++ b/Source/Core/DolphinWX/VideoConfigDiag.cpp @@ -317,6 +317,12 @@ static wxString ubershader_desc = "stuttering. Balances performance and smoothness.\n\n" "Exclusive: Ubershaders will always be used. Only recommended for high-end " "systems."); +static wxString wait_for_shaders_desc = + wxTRANSLATE("Waits for all shaders to finish compiling before starting a game. Enabling this " + "option may reduce stuttering or hitching for a short time after the game is " + "started, at the cost of a longer delay before the game starts.\n\nFor systems " + "with two or fewer cores, it is recommended to enable this option, as a large " + "shader queue may reduce frame rates. Otherwise, if unsure, leave this unchecked."); VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title) : wxDialog(parent, wxID_ANY, wxString::Format(_("Dolphin %s Graphics Configuration"), @@ -442,6 +448,10 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title) wxGetTranslation(backend_multithreading_desc), Config::GFX_BACKEND_MULTITHREADING)); } + + szr_other->Add(CreateCheckBox(page_general, _("Immediately Compile Shaders"), + wxGetTranslation(wait_for_shaders_desc), + Config::GFX_WAIT_FOR_SHADERS_BEFORE_STARTING)); } wxStaticBoxSizer* const group_basic = diff --git a/Source/Core/VideoCommon/AsyncShaderCompiler.cpp b/Source/Core/VideoCommon/AsyncShaderCompiler.cpp index 59ef9762f1..e605828ed5 100644 --- a/Source/Core/VideoCommon/AsyncShaderCompiler.cpp +++ b/Source/Core/VideoCommon/AsyncShaderCompiler.cpp @@ -18,7 +18,6 @@ AsyncShaderCompiler::~AsyncShaderCompiler() // Pending work can be left at shutdown. // The work item classes are expected to clean up after themselves. _assert_(!HasWorkerThreads()); - _assert_(m_completed_work.empty()); } void AsyncShaderCompiler::QueueWorkItem(WorkItemPtr item) diff --git a/Source/Core/VideoCommon/GXPipelineTypes.h b/Source/Core/VideoCommon/GXPipelineTypes.h new file mode 100644 index 0000000000..165e9c67f8 --- /dev/null +++ b/Source/Core/VideoCommon/GXPipelineTypes.h @@ -0,0 +1,75 @@ +// Copyright 2018 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoCommon/GeometryShaderGen.h" +#include "VideoCommon/PixelShaderGen.h" +#include "VideoCommon/RenderState.h" +#include "VideoCommon/UberShaderPixel.h" +#include "VideoCommon/UberShaderVertex.h" +#include "VideoCommon/VertexShaderGen.h" + +class NativeVertexFormat; + +namespace VideoCommon +{ +struct GXPipelineUid +{ + const NativeVertexFormat* vertex_format; + VertexShaderUid vs_uid; + GeometryShaderUid gs_uid; + PixelShaderUid ps_uid; + RasterizationState rasterization_state; + DepthState depth_state; + BlendingState blending_state; + + // We use memcmp() for comparing pipelines as std::tie generates a large number of instructions, + // and this map lookup can happen every draw call. However, as using memcmp() will also compare + // any padding bytes, we have to ensure these are zeroed out. + GXPipelineUid() { std::memset(this, 0, sizeof(*this)); } + GXPipelineUid(const GXPipelineUid& rhs) { std::memcpy(this, &rhs, sizeof(*this)); } + GXPipelineUid& operator=(const GXPipelineUid& rhs) + { + std::memcpy(this, &rhs, sizeof(*this)); + return *this; + } + bool operator<(const GXPipelineUid& rhs) const + { + return std::memcmp(this, &rhs, sizeof(*this)) < 0; + } + bool operator==(const GXPipelineUid& rhs) const + { + return std::memcmp(this, &rhs, sizeof(*this)) == 0; + } + bool operator!=(const GXPipelineUid& rhs) const { return !operator==(rhs); } +}; +struct GXUberPipelineUid +{ + const NativeVertexFormat* vertex_format; + UberShader::VertexShaderUid vs_uid; + GeometryShaderUid gs_uid; + UberShader::PixelShaderUid ps_uid; + RasterizationState rasterization_state; + DepthState depth_state; + BlendingState blending_state; + + GXUberPipelineUid() { std::memset(this, 0, sizeof(*this)); } + GXUberPipelineUid(const GXUberPipelineUid& rhs) { std::memcpy(this, &rhs, sizeof(*this)); } + GXUberPipelineUid& operator=(const GXUberPipelineUid& rhs) + { + std::memcpy(this, &rhs, sizeof(*this)); + return *this; + } + bool operator<(const GXUberPipelineUid& rhs) const + { + return std::memcmp(this, &rhs, sizeof(*this)) < 0; + } + bool operator==(const GXUberPipelineUid& rhs) const + { + return std::memcmp(this, &rhs, sizeof(*this)) == 0; + } + bool operator!=(const GXUberPipelineUid& rhs) const { return !operator==(rhs); } +}; +} // namespace VideoCommon diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 43a31dfd6b..512adbd12a 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -43,6 +43,8 @@ bool ShaderCache::Initialize() // Compile all known UIDs. CompileMissingPipelines(); + if (g_ActiveConfig.bWaitForShadersBeforeStarting) + WaitForAsyncCompiler(); // Switch to the runtime shader compiler thread configuration. m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); @@ -61,9 +63,7 @@ void ShaderCache::SetHostConfig(const ShaderHostConfig& host_config, u32 efb_mul void ShaderCache::Reload() { - m_async_shader_compiler->WaitUntilCompletion(); - m_async_shader_compiler->RetrieveWorkItems(); - + WaitForAsyncCompiler(); InvalidateCachedPipelines(); ClearShaderCaches(); @@ -77,6 +77,8 @@ void ShaderCache::Reload() // UIDs are still be in the map. Therefore, when these are rebuilt, the shaders will also // be recompiled. CompileMissingPipelines(); + if (g_ActiveConfig.bWaitForShadersBeforeStarting) + WaitForAsyncCompiler(); m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); } @@ -87,14 +89,14 @@ void ShaderCache::RetrieveAsyncShaders() void ShaderCache::Shutdown() { + // This may leave shaders uncommitted to the cache, but it's better than blocking shutdown + // until everything has finished compiling. m_async_shader_compiler->StopWorkerThreads(); - m_async_shader_compiler->RetrieveWorkItems(); - ClearShaderCaches(); ClearPipelineCaches(); } -const AbstractPipeline* ShaderCache::GetPipelineForUid(const GXPipelineConfig& uid) +const AbstractPipeline* ShaderCache::GetPipelineForUid(const GXPipelineUid& uid) { auto it = m_gx_pipeline_cache.find(uid); if (it != m_gx_pipeline_cache.end() && !it->second.second) @@ -109,8 +111,7 @@ const AbstractPipeline* ShaderCache::GetPipelineForUid(const GXPipelineConfig& u return InsertGXPipeline(uid, std::move(pipeline)); } -std::optional -ShaderCache::GetPipelineForUidAsync(const GXPipelineConfig& uid) +std::optional ShaderCache::GetPipelineForUidAsync(const GXPipelineUid& uid) { auto it = m_gx_pipeline_cache.find(uid); if (it != m_gx_pipeline_cache.end()) @@ -159,7 +160,7 @@ ShaderCache::GetPipelineForUidAsync(const GXPipelineConfig& uid) return {}; } -const AbstractPipeline* ShaderCache::GetUberPipelineForUid(const GXUberPipelineConfig& uid) +const AbstractPipeline* ShaderCache::GetUberPipelineForUid(const GXUberPipelineUid& uid) { auto it = m_gx_uber_pipeline_cache.find(uid); if (it != m_gx_uber_pipeline_cache.end() && !it->second.second) @@ -172,12 +173,16 @@ const AbstractPipeline* ShaderCache::GetUberPipelineForUid(const GXUberPipelineC return InsertGXUberPipeline(uid, std::move(pipeline)); } -void ShaderCache::WaitForAsyncCompiler(const std::string& msg) +void ShaderCache::WaitForAsyncCompiler() { - m_async_shader_compiler->WaitUntilCompletion([&msg](size_t completed, size_t total) { - Host_UpdateProgressDialog(msg.c_str(), static_cast(completed), static_cast(total)); - }); - m_async_shader_compiler->RetrieveWorkItems(); + while (m_async_shader_compiler->HasPendingWork()) + { + m_async_shader_compiler->WaitUntilCompletion([](size_t completed, size_t total) { + Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(), + static_cast(completed), static_cast(total)); + }); + m_async_shader_compiler->RetrieveWorkItems(); + } Host_UpdateProgressDialog("", -1, -1); } @@ -274,7 +279,7 @@ void ShaderCache::LoadPipelineUIDCache() CacheReader(ShaderCache* shader_cache_) : shader_cache(shader_cache_) {} void Read(const GXPipelineDiskCacheUid& key, const u8* data, u32 data_size) { - GXPipelineConfig config = {}; + GXPipelineUid config = {}; config.vertex_format = VertexLoaderManager::GetOrCreateMatchingFormat(key.vertex_decl); config.vs_uid = key.vs_uid; config.gs_uid = key.gs_uid; @@ -315,8 +320,6 @@ void ShaderCache::CompileMissingPipelines() if (!it.second.second) QueueUberPipelineCompile(it.first); } - - WaitForAsyncCompiler(GetStringT("Compiling shaders...")); } void ShaderCache::InvalidateCachedPipelines() @@ -508,8 +511,7 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig( return config; } -std::optional -ShaderCache::GetGXPipelineConfig(const GXPipelineConfig& config) +std::optional ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config) { const AbstractShader* vs; auto vs_iter = m_vs_cache.shader_map.find(config.vs_uid); @@ -545,7 +547,7 @@ ShaderCache::GetGXPipelineConfig(const GXPipelineConfig& config) } std::optional -ShaderCache::GetGXUberPipelineConfig(const GXUberPipelineConfig& config) +ShaderCache::GetGXUberPipelineConfig(const GXUberPipelineUid& config) { const AbstractShader* vs; auto vs_iter = m_uber_vs_cache.shader_map.find(config.vs_uid); @@ -580,7 +582,7 @@ ShaderCache::GetGXUberPipelineConfig(const GXUberPipelineConfig& config) config.depth_state, config.blending_state); } -const AbstractPipeline* ShaderCache::InsertGXPipeline(const GXPipelineConfig& config, +const AbstractPipeline* ShaderCache::InsertGXPipeline(const GXPipelineUid& config, std::unique_ptr pipeline) { auto& entry = m_gx_pipeline_cache[config]; @@ -592,7 +594,7 @@ const AbstractPipeline* ShaderCache::InsertGXPipeline(const GXPipelineConfig& co } const AbstractPipeline* -ShaderCache::InsertGXUberPipeline(const GXUberPipelineConfig& config, +ShaderCache::InsertGXUberPipeline(const GXUberPipelineUid& config, std::unique_ptr pipeline) { auto& entry = m_gx_uber_pipeline_cache[config]; @@ -603,7 +605,7 @@ ShaderCache::InsertGXUberPipeline(const GXUberPipelineConfig& config, return entry.first.get(); } -void ShaderCache::AppendGXPipelineUID(const GXPipelineConfig& config) +void ShaderCache::AppendGXPipelineUID(const GXPipelineUid& config) { // Convert to disk format. GXPipelineDiskCacheUid disk_uid = {}; @@ -633,7 +635,7 @@ void ShaderCache::QueueVertexShaderCompile(const VertexShaderUid& uid) return true; } - virtual void Retrieve() override { shader_cache->InsertVertexShader(uid, std::move(shader)); } + void Retrieve() override { shader_cache->InsertVertexShader(uid, std::move(shader)); } private: ShaderCache* shader_cache; std::unique_ptr shader; @@ -661,11 +663,7 @@ void ShaderCache::QueueVertexUberShaderCompile(const UberShader::VertexShaderUid return true; } - virtual void Retrieve() override - { - shader_cache->InsertVertexUberShader(uid, std::move(shader)); - } - + void Retrieve() override { shader_cache->InsertVertexUberShader(uid, std::move(shader)); } private: ShaderCache* shader_cache; std::unique_ptr shader; @@ -693,7 +691,7 @@ void ShaderCache::QueuePixelShaderCompile(const PixelShaderUid& uid) return true; } - virtual void Retrieve() override { shader_cache->InsertPixelShader(uid, std::move(shader)); } + void Retrieve() override { shader_cache->InsertPixelShader(uid, std::move(shader)); } private: ShaderCache* shader_cache; std::unique_ptr shader; @@ -721,11 +719,7 @@ void ShaderCache::QueuePixelUberShaderCompile(const UberShader::PixelShaderUid& return true; } - virtual void Retrieve() override - { - shader_cache->InsertPixelUberShader(uid, std::move(shader)); - } - + void Retrieve() override { shader_cache->InsertPixelUberShader(uid, std::move(shader)); } private: ShaderCache* shader_cache; std::unique_ptr shader; @@ -737,12 +731,12 @@ void ShaderCache::QueuePixelUberShaderCompile(const UberShader::PixelShaderUid& m_async_shader_compiler->QueueWorkItem(std::move(wi)); } -void ShaderCache::QueuePipelineCompile(const GXPipelineConfig& uid) +void ShaderCache::QueuePipelineCompile(const GXPipelineUid& uid) { class PipelineWorkItem final : public AsyncShaderCompiler::WorkItem { public: - PipelineWorkItem(ShaderCache* shader_cache_, const GXPipelineConfig& uid_, + PipelineWorkItem(ShaderCache* shader_cache_, const GXPipelineUid& uid_, const AbstractPipelineConfig& config_) : shader_cache(shader_cache_), uid(uid_), config(config_) { @@ -754,11 +748,11 @@ void ShaderCache::QueuePipelineCompile(const GXPipelineConfig& uid) return true; } - virtual void Retrieve() override { shader_cache->InsertGXPipeline(uid, std::move(pipeline)); } + void Retrieve() override { shader_cache->InsertGXPipeline(uid, std::move(pipeline)); } private: ShaderCache* shader_cache; std::unique_ptr pipeline; - GXPipelineConfig uid; + GXPipelineUid uid; AbstractPipelineConfig config; }; @@ -775,13 +769,15 @@ void ShaderCache::QueuePipelineCompile(const GXPipelineConfig& uid) m_gx_pipeline_cache[uid].second = true; } -void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineConfig& uid) +void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid) { - class UberPipelineWorkItem final : public AsyncShaderCompiler::WorkItem + // Since the shaders may not be compiled at pipelines request time, we do this in two passes. + // This is necessary because we can't access the caches in the worker thread. + class UberPipelineCompilePass final : public AsyncShaderCompiler::WorkItem { public: - UberPipelineWorkItem(ShaderCache* shader_cache_, const GXUberPipelineConfig& uid_, - const AbstractPipelineConfig& config_) + UberPipelineCompilePass(ShaderCache* shader_cache_, const GXUberPipelineUid& uid_, + const AbstractPipelineConfig& config_) : shader_cache(shader_cache_), uid(uid_), config(config_) { } @@ -792,27 +788,43 @@ void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineConfig& uid) return true; } - virtual void Retrieve() override + void Retrieve() override { shader_cache->InsertGXUberPipeline(uid, std::move(pipeline)); } + private: + ShaderCache* shader_cache; + std::unique_ptr pipeline; + GXUberPipelineUid uid; + AbstractPipelineConfig config; + }; + class UberPipelinePreparePass final : public AsyncShaderCompiler::WorkItem + { + public: + UberPipelinePreparePass(ShaderCache* shader_cache_, const GXUberPipelineUid& uid_) + : shader_cache(shader_cache_), uid(uid_) { - shader_cache->InsertGXUberPipeline(uid, std::move(pipeline)); + } + + bool Compile() override { return true; } + void Retrieve() override + { + auto config = shader_cache->GetGXUberPipelineConfig(uid); + if (!config) + { + // One or more stages failed to compile. + shader_cache->InsertGXUberPipeline(uid, nullptr); + return; + } + + auto wi = shader_cache->m_async_shader_compiler->CreateWorkItem( + shader_cache, uid, *config); + shader_cache->m_async_shader_compiler->QueueWorkItem(std::move(wi)); } private: ShaderCache* shader_cache; - std::unique_ptr pipeline; - GXUberPipelineConfig uid; - AbstractPipelineConfig config; + GXUberPipelineUid uid; }; - auto config = GetGXUberPipelineConfig(uid); - if (!config) - { - // One or more stages failed to compile. - InsertGXUberPipeline(uid, nullptr); - return; - } - - auto wi = m_async_shader_compiler->CreateWorkItem(this, uid, *config); + auto wi = m_async_shader_compiler->CreateWorkItem(this, uid); m_async_shader_compiler->QueueWorkItem(std::move(wi)); m_gx_uber_pipeline_cache[uid].second = true; } @@ -841,9 +853,6 @@ void ShaderCache::PrecompileUberShaders() QueuePixelUberShaderCompile(puid); }); - // Wait for shaders to finish compiling. - WaitForAsyncCompiler(GetStringT("Compiling uber shaders...")); - // Create a dummy vertex format with no attributes. // All attributes will be enabled in GetUberVertexFormat. PortableVertexDeclaration dummy_vertex_decl = {}; @@ -856,7 +865,7 @@ void ShaderCache::PrecompileUberShaders() auto QueueDummyPipeline = [&](const UberShader::VertexShaderUid& vs_uid, const GeometryShaderUid& gs_uid, const UberShader::PixelShaderUid& ps_uid) { - GXUberPipelineConfig config; + GXUberPipelineUid config; config.vertex_format = dummy_vertex_format; config.vs_uid = vs_uid; config.gs_uid = gs_uid; diff --git a/Source/Core/VideoCommon/ShaderCache.h b/Source/Core/VideoCommon/ShaderCache.h index 6516edd8d8..8681df19be 100644 --- a/Source/Core/VideoCommon/ShaderCache.h +++ b/Source/Core/VideoCommon/ShaderCache.h @@ -19,10 +19,10 @@ #include "VideoCommon/AbstractPipeline.h" #include "VideoCommon/AbstractShader.h" -#include "VideoCommon/NativeVertexFormat.h" - #include "VideoCommon/AsyncShaderCompiler.h" +#include "VideoCommon/GXPipelineTypes.h" #include "VideoCommon/GeometryShaderGen.h" +#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/PixelShaderGen.h" #include "VideoCommon/RenderState.h" #include "VideoCommon/UberShaderPixel.h" @@ -33,64 +33,6 @@ class NativeVertexFormat; namespace VideoCommon { -struct GXPipelineConfig -{ - const NativeVertexFormat* vertex_format; - VertexShaderUid vs_uid; - GeometryShaderUid gs_uid; - PixelShaderUid ps_uid; - RasterizationState rasterization_state; - DepthState depth_state; - BlendingState blending_state; - - // We use memcmp() for comparing pipelines as std::tie generates a large number of instructions, - // and this map lookup can happen every draw call. However, as using memcmp() will also compare - // any padding bytes, we have to ensure these are zeroed out. - GXPipelineConfig() { std::memset(this, 0, sizeof(*this)); } - GXPipelineConfig(const GXPipelineConfig& rhs) { std::memcpy(this, &rhs, sizeof(*this)); } - GXPipelineConfig& operator=(const GXPipelineConfig& rhs) - { - std::memcpy(this, &rhs, sizeof(*this)); - return *this; - } - bool operator<(const GXPipelineConfig& rhs) const - { - return std::memcmp(this, &rhs, sizeof(*this)) < 0; - } - bool operator==(const GXPipelineConfig& rhs) const - { - return std::memcmp(this, &rhs, sizeof(*this)) == 0; - } - bool operator!=(const GXPipelineConfig& rhs) const { return !operator==(rhs); } -}; -struct GXUberPipelineConfig -{ - const NativeVertexFormat* vertex_format; - UberShader::VertexShaderUid vs_uid; - GeometryShaderUid gs_uid; - UberShader::PixelShaderUid ps_uid; - RasterizationState rasterization_state; - DepthState depth_state; - BlendingState blending_state; - - GXUberPipelineConfig() { std::memset(this, 0, sizeof(*this)); } - GXUberPipelineConfig(const GXUberPipelineConfig& rhs) { std::memcpy(this, &rhs, sizeof(*this)); } - GXUberPipelineConfig& operator=(const GXUberPipelineConfig& rhs) - { - std::memcpy(this, &rhs, sizeof(*this)); - return *this; - } - bool operator<(const GXUberPipelineConfig& rhs) const - { - return std::memcmp(this, &rhs, sizeof(*this)) < 0; - } - bool operator==(const GXUberPipelineConfig& rhs) const - { - return std::memcmp(this, &rhs, sizeof(*this)) == 0; - } - bool operator!=(const GXUberPipelineConfig& rhs) const { return !operator==(rhs); } -}; - class ShaderCache final { public: @@ -114,15 +56,15 @@ public: std::string GetUtilityShaderHeader() const; // Accesses ShaderGen shader caches - const AbstractPipeline* GetPipelineForUid(const GXPipelineConfig& uid); - const AbstractPipeline* GetUberPipelineForUid(const GXUberPipelineConfig& uid); + const AbstractPipeline* GetPipelineForUid(const GXPipelineUid& uid); + const AbstractPipeline* GetUberPipelineForUid(const GXUberPipelineUid& uid); // Accesses ShaderGen shader caches asynchronously. // The optional will be empty if this pipeline is now background compiling. - std::optional GetPipelineForUidAsync(const GXPipelineConfig& uid); + std::optional GetPipelineForUidAsync(const GXPipelineUid& uid); private: - void WaitForAsyncCompiler(const std::string& msg); + void WaitForAsyncCompiler(); void LoadShaderCaches(); void ClearShaderCaches(); void LoadPipelineUIDCache(); @@ -155,21 +97,21 @@ private: const AbstractShader* geometry_shader, const AbstractShader* pixel_shader, const RasterizationState& rasterization_state, const DepthState& depth_state, const BlendingState& blending_state); - std::optional GetGXPipelineConfig(const GXPipelineConfig& uid); - std::optional GetGXUberPipelineConfig(const GXUberPipelineConfig& uid); - const AbstractPipeline* InsertGXPipeline(const GXPipelineConfig& config, + std::optional GetGXPipelineConfig(const GXPipelineUid& uid); + std::optional GetGXUberPipelineConfig(const GXUberPipelineUid& uid); + const AbstractPipeline* InsertGXPipeline(const GXPipelineUid& config, std::unique_ptr pipeline); - const AbstractPipeline* InsertGXUberPipeline(const GXUberPipelineConfig& config, + const AbstractPipeline* InsertGXUberPipeline(const GXUberPipelineUid& config, std::unique_ptr pipeline); - void AppendGXPipelineUID(const GXPipelineConfig& config); + void AppendGXPipelineUID(const GXPipelineUid& config); // ASync Compiler Methods void QueueVertexShaderCompile(const VertexShaderUid& uid); void QueueVertexUberShaderCompile(const UberShader::VertexShaderUid& uid); void QueuePixelShaderCompile(const PixelShaderUid& uid); void QueuePixelUberShaderCompile(const UberShader::PixelShaderUid& uid); - void QueuePipelineCompile(const GXPipelineConfig& uid); - void QueueUberPipelineCompile(const GXUberPipelineConfig& uid); + void QueuePipelineCompile(const GXPipelineUid& uid); + void QueueUberPipelineCompile(const GXUberPipelineUid& uid); // Configuration bits. APIType m_api_type = APIType::Nothing; @@ -196,10 +138,8 @@ private: ShaderModuleCache m_uber_ps_cache; // GX Pipeline Caches - .first - pipeline, .second - pending - // TODO: Use unordered_map for speed. - std::map, bool>> - m_gx_pipeline_cache; - std::map, bool>> + std::map, bool>> m_gx_pipeline_cache; + std::map, bool>> m_gx_uber_pipeline_cache; // Disk cache of pipeline UIDs diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index eac95f614f..4d07b08411 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -83,8 +83,8 @@ protected: Slope m_zslope = {}; void CalculateZSlope(NativeVertexFormat* format); - VideoCommon::GXPipelineConfig m_current_pipeline_config; - VideoCommon::GXUberPipelineConfig m_current_uber_pipeline_config; + VideoCommon::GXPipelineUid m_current_pipeline_config; + VideoCommon::GXUberPipelineUid m_current_uber_pipeline_config; const AbstractPipeline* m_current_pipeline_object = nullptr; PrimitiveType m_current_primitive_type = PrimitiveType::Points; bool m_pipeline_config_changed = true; diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index 4cc6611ee8..b84bfb6d66 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -120,6 +120,7 @@ + @@ -190,4 +191,4 @@ - + \ No newline at end of file diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index ca0fe0029b..f356cc9ef4 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -198,7 +198,7 @@ Base - Shader Managers + Shader Generators @@ -381,11 +381,14 @@ Base + + Shader Generators + - Shader Managers + Shader Generators - + \ No newline at end of file diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index c14a00757a..271223ba3d 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -102,6 +102,7 @@ void VideoConfig::Refresh() bBackendMultithreading = Config::Get(Config::GFX_BACKEND_MULTITHREADING); iCommandBufferExecuteInterval = Config::Get(Config::GFX_COMMAND_BUFFER_EXECUTE_INTERVAL); bShaderCache = Config::Get(Config::GFX_SHADER_CACHE); + bWaitForShadersBeforeStarting = Config::Get(Config::GFX_WAIT_FOR_SHADERS_BEFORE_STARTING); iUberShaderMode = static_cast(Config::Get(Config::GFX_UBERSHADER_MODE)); iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS); iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS); @@ -196,6 +197,10 @@ u32 VideoConfig::GetShaderCompilerThreads() const u32 VideoConfig::GetShaderPrecompilerThreads() const { + // When using background compilation, always keep the same thread count. + if (bWaitForShadersBeforeStarting) + return GetShaderCompilerThreads(); + if (!backend_info.bSupportsBackgroundCompiling) return 0; diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 2b69919528..c8bded371b 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -169,6 +169,7 @@ struct VideoConfig final int iCommandBufferExecuteInterval; // Shader compilation settings. + bool bWaitForShadersBeforeStarting; UberShaderMode iUberShaderMode; // Number of shader compiler threads.