ShaderCache: Implement compile priority

Currently, when immediately compile shaders is not enabled, the ubershaders will be placed before any specialized shaders in the compile queue in hybrid ubershaders mode. This means that Dolphin could potentially use the ubershaders for a longer time than it would have if we blocked startup until all shaders were compiled, leading to a drop in performance.
2018-03-16 22:48:56 +10:00 · 2018-03-16 22:48:56 +10:00 · 93865b327f
parent d3dd85dbd7
commit 93865b327f
4 changed files with 60 additions and 41 deletions
--- a/Source/Core/VideoCommon/AsyncShaderCompiler.cpp
+++ b/Source/Core/VideoCommon/AsyncShaderCompiler.cpp
@ -20,7 +20,7 @@ AsyncShaderCompiler::~AsyncShaderCompiler()
  _assert_(!HasWorkerThreads());
 }

-void AsyncShaderCompiler::QueueWorkItem(WorkItemPtr item)
+void AsyncShaderCompiler::QueueWorkItem(WorkItemPtr item, u32 priority)
 {
  // If no worker threads are available, compile synchronously.
  if (!HasWorkerThreads())
@ -31,7 +31,7 @@ void AsyncShaderCompiler::QueueWorkItem(WorkItemPtr item)
  else
  {
    std::lock_guard<std::mutex> guard(m_pending_work_lock);
-    m_pending_work.push_back(std::move(item));
+    m_pending_work.emplace(priority, std::move(item));
    m_worker_thread_wake.notify_one();
  }
 }
@ -219,8 +219,9 @@ void AsyncShaderCompiler::WorkerThreadRun()
    while (!m_pending_work.empty() && !m_exit_flag.IsSet())
    {
      m_busy_workers++;
-      WorkItemPtr item(std::move(m_pending_work.front()));
-      m_pending_work.pop_front();
+      auto iter = m_pending_work.begin();
+      WorkItemPtr item(std::move(iter->second));
+      m_pending_work.erase(iter);
      pending_lock.unlock();

      if (item->Compile())
--- a/Source/Core/VideoCommon/AsyncShaderCompiler.h
+++ b/Source/Core/VideoCommon/AsyncShaderCompiler.h
@ -8,6 +8,7 @@
 #include <condition_variable>
 #include <deque>
 #include <functional>
+#include <map>
 #include <memory>
 #include <mutex>
 #include <thread>
@ -42,7 +43,9 @@ public:
    return std::make_unique<T>(std::forward<Params>(params)...);
  }

-  void QueueWorkItem(WorkItemPtr item);
+  // Queues a new work item to the compiler threads. The lower the priority, the sooner
+  // this work item will be compiled, relative to the other work items.
+  void QueueWorkItem(WorkItemPtr item, u32 priority);
  void RetrieveWorkItems();
  bool HasPendingWork();
  bool HasCompletedWork();
@ -74,7 +77,9 @@ private:
  std::vector<std::thread> m_worker_threads;
  std::atomic_bool m_worker_thread_start_result{false};

-  std::deque<WorkItemPtr> m_pending_work;
+  // A multimap is used to store the work items. We can't use a priority_queue here, because
+  // there's no way to obtain a non-const reference, which we need for the unique_ptr.
+  std::multimap<u32, WorkItemPtr> m_pending_work;
  std::mutex m_pending_work_lock;
  std::condition_variable m_worker_thread_wake;
  std::atomic_size_t m_busy_workers{0};
--- a/Source/Core/VideoCommon/ShaderCache.cpp
+++ b/Source/Core/VideoCommon/ShaderCache.cpp
@ -129,7 +129,7 @@ std::optional<const AbstractPipeline*> ShaderCache::GetPipelineForUidAsync(const
  }

  AppendGXPipelineUID(uid);
-  QueuePipelineCompile(uid);
+  QueuePipelineCompile(uid, COMPILE_PRIORITY_ONDEMAND_PIPELINE);
  return {};
 }

@ -249,12 +249,12 @@ void ShaderCache::CompileMissingPipelines()
  for (auto& it : m_gx_pipeline_cache)
  {
    if (!it.second.second)
-      QueuePipelineCompile(it.first);
+      QueuePipelineCompile(it.first, COMPILE_PRIORITY_SHADERCACHE_PIPELINE);
  }
  for (auto& it : m_gx_uber_pipeline_cache)
  {
    if (!it.second.second)
-      QueueUberPipelineCompile(it.first);
+      QueueUberPipelineCompile(it.first, COMPILE_PRIORITY_UBERSHADER_PIPELINE);
  }
 }

@ -655,7 +655,7 @@ void ShaderCache::AppendGXPipelineUID(const GXPipelineUid& config)
  }
 }

-void ShaderCache::QueueVertexShaderCompile(const VertexShaderUid& uid)
+void ShaderCache::QueueVertexShaderCompile(const VertexShaderUid& uid, u32 priority)
 {
  class VertexShaderWorkItem final : public AsyncShaderCompiler::WorkItem
  {
@ -680,10 +680,10 @@ void ShaderCache::QueueVertexShaderCompile(const VertexShaderUid& uid)

  m_vs_cache.shader_map[uid].pending = true;
  auto wi = m_async_shader_compiler->CreateWorkItem<VertexShaderWorkItem>(this, uid);
-  m_async_shader_compiler->QueueWorkItem(std::move(wi));
+  m_async_shader_compiler->QueueWorkItem(std::move(wi), priority);
 }

-void ShaderCache::QueueVertexUberShaderCompile(const UberShader::VertexShaderUid& uid)
+void ShaderCache::QueueVertexUberShaderCompile(const UberShader::VertexShaderUid& uid, u32 priority)
 {
  class VertexUberShaderWorkItem final : public AsyncShaderCompiler::WorkItem
  {
@ -708,10 +708,10 @@ void ShaderCache::QueueVertexUberShaderCompile(const UberShader::VertexShaderUid

  m_uber_vs_cache.shader_map[uid].pending = true;
  auto wi = m_async_shader_compiler->CreateWorkItem<VertexUberShaderWorkItem>(this, uid);
-  m_async_shader_compiler->QueueWorkItem(std::move(wi));
+  m_async_shader_compiler->QueueWorkItem(std::move(wi), priority);
 }

-void ShaderCache::QueuePixelShaderCompile(const PixelShaderUid& uid)
+void ShaderCache::QueuePixelShaderCompile(const PixelShaderUid& uid, u32 priority)
 {
  class PixelShaderWorkItem final : public AsyncShaderCompiler::WorkItem
  {
@ -736,10 +736,10 @@ void ShaderCache::QueuePixelShaderCompile(const PixelShaderUid& uid)

  m_ps_cache.shader_map[uid].pending = true;
  auto wi = m_async_shader_compiler->CreateWorkItem<PixelShaderWorkItem>(this, uid);
-  m_async_shader_compiler->QueueWorkItem(std::move(wi));
+  m_async_shader_compiler->QueueWorkItem(std::move(wi), priority);
 }

-void ShaderCache::QueuePixelUberShaderCompile(const UberShader::PixelShaderUid& uid)
+void ShaderCache::QueuePixelUberShaderCompile(const UberShader::PixelShaderUid& uid, u32 priority)
 {
  class PixelUberShaderWorkItem final : public AsyncShaderCompiler::WorkItem
  {
@ -764,16 +764,16 @@ void ShaderCache::QueuePixelUberShaderCompile(const UberShader::PixelShaderUid&

  m_uber_ps_cache.shader_map[uid].pending = true;
  auto wi = m_async_shader_compiler->CreateWorkItem<PixelUberShaderWorkItem>(this, uid);
-  m_async_shader_compiler->QueueWorkItem(std::move(wi));
+  m_async_shader_compiler->QueueWorkItem(std::move(wi), priority);
 }

-void ShaderCache::QueuePipelineCompile(const GXPipelineUid& uid)
+void ShaderCache::QueuePipelineCompile(const GXPipelineUid& uid, u32 priority)
 {
  class PipelineWorkItem final : public AsyncShaderCompiler::WorkItem
  {
  public:
-    PipelineWorkItem(ShaderCache* shader_cache_, const GXPipelineUid& uid_)
-        : shader_cache(shader_cache_), uid(uid_)
+    PipelineWorkItem(ShaderCache* shader_cache_, const GXPipelineUid& uid_, u32 priority_)
+        : shader_cache(shader_cache_), uid(uid_), priority(priority_)
    {
      // Check if all the stages required for this pipeline have been compiled.
      // If not, this work item becomes a no-op, and re-queues the pipeline for the next frame.
@ -788,12 +788,12 @@ void ShaderCache::QueuePipelineCompile(const GXPipelineUid& uid)
      auto vs_it = shader_cache->m_vs_cache.shader_map.find(uid.vs_uid);
      stages_ready &= vs_it != shader_cache->m_vs_cache.shader_map.end() && !vs_it->second.pending;
      if (vs_it == shader_cache->m_vs_cache.shader_map.end())
-        shader_cache->QueueVertexShaderCompile(uid.vs_uid);
+        shader_cache->QueueVertexShaderCompile(uid.vs_uid, priority);

      auto ps_it = shader_cache->m_ps_cache.shader_map.find(uid.ps_uid);
      stages_ready &= ps_it != shader_cache->m_ps_cache.shader_map.end() && !ps_it->second.pending;
      if (ps_it == shader_cache->m_ps_cache.shader_map.end())
-        shader_cache->QueuePixelShaderCompile(uid.ps_uid);
+        shader_cache->QueuePixelShaderCompile(uid.ps_uid, priority);

      return stages_ready;
    }
@ -815,8 +815,8 @@ void ShaderCache::QueuePipelineCompile(const GXPipelineUid& uid)
      {
        // Re-queue for next frame.
        auto wi = shader_cache->m_async_shader_compiler->CreateWorkItem<PipelineWorkItem>(
-            shader_cache, uid);
-        shader_cache->m_async_shader_compiler->QueueWorkItem(std::move(wi));
+            shader_cache, uid, priority);
+        shader_cache->m_async_shader_compiler->QueueWorkItem(std::move(wi), priority);
      }
    }

@ -824,22 +824,23 @@ void ShaderCache::QueuePipelineCompile(const GXPipelineUid& uid)
    ShaderCache* shader_cache;
    std::unique_ptr<AbstractPipeline> pipeline;
    GXPipelineUid uid;
+    u32 priority;
    std::optional<AbstractPipelineConfig> config;
    bool stages_ready;
  };

-  auto wi = m_async_shader_compiler->CreateWorkItem<PipelineWorkItem>(this, uid);
-  m_async_shader_compiler->QueueWorkItem(std::move(wi));
+  auto wi = m_async_shader_compiler->CreateWorkItem<PipelineWorkItem>(this, uid, priority);
+  m_async_shader_compiler->QueueWorkItem(std::move(wi), priority);
  m_gx_pipeline_cache[uid].second = true;
 }

-void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid)
+void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid, u32 priority)
 {
  class UberPipelineWorkItem final : public AsyncShaderCompiler::WorkItem
  {
  public:
-    UberPipelineWorkItem(ShaderCache* shader_cache_, const GXUberPipelineUid& uid_)
-        : shader_cache(shader_cache_), uid(uid_)
+    UberPipelineWorkItem(ShaderCache* shader_cache_, const GXUberPipelineUid& uid_, u32 priority_)
+        : shader_cache(shader_cache_), uid(uid_), priority(priority_)
    {
      // Check if all the stages required for this UberPipeline have been compiled.
      // If not, this work item becomes a no-op, and re-queues the UberPipeline for the next frame.
@ -855,13 +856,13 @@ void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid)
      stages_ready &=
          vs_it != shader_cache->m_uber_vs_cache.shader_map.end() && !vs_it->second.pending;
      if (vs_it == shader_cache->m_uber_vs_cache.shader_map.end())
-        shader_cache->QueueVertexUberShaderCompile(uid.vs_uid);
+        shader_cache->QueueVertexUberShaderCompile(uid.vs_uid, priority);

      auto ps_it = shader_cache->m_uber_ps_cache.shader_map.find(uid.ps_uid);
      stages_ready &=
          ps_it != shader_cache->m_uber_ps_cache.shader_map.end() && !ps_it->second.pending;
      if (ps_it == shader_cache->m_uber_ps_cache.shader_map.end())
-        shader_cache->QueuePixelUberShaderCompile(uid.ps_uid);
+        shader_cache->QueuePixelUberShaderCompile(uid.ps_uid, priority);

      return stages_ready;
    }
@ -883,8 +884,8 @@ void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid)
      {
        // Re-queue for next frame.
        auto wi = shader_cache->m_async_shader_compiler->CreateWorkItem<UberPipelineWorkItem>(
-            shader_cache, uid);
-        shader_cache->m_async_shader_compiler->QueueWorkItem(std::move(wi));
+            shader_cache, uid, priority);
+        shader_cache->m_async_shader_compiler->QueueWorkItem(std::move(wi), priority);
      }
    }

@ -892,12 +893,13 @@ void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid)
    ShaderCache* shader_cache;
    std::unique_ptr<AbstractPipeline> UberPipeline;
    GXUberPipelineUid uid;
+    u32 priority;
    std::optional<AbstractPipelineConfig> config;
    bool stages_ready;
  };

-  auto wi = m_async_shader_compiler->CreateWorkItem<UberPipelineWorkItem>(this, uid);
-  m_async_shader_compiler->QueueWorkItem(std::move(wi));
+  auto wi = m_async_shader_compiler->CreateWorkItem<UberPipelineWorkItem>(this, uid, priority);
+  m_async_shader_compiler->QueueWorkItem(std::move(wi), priority);
  m_gx_uber_pipeline_cache[uid].second = true;
 }

--- a/Source/Core/VideoCommon/ShaderCache.h
+++ b/Source/Core/VideoCommon/ShaderCache.h
@ -108,12 +108,23 @@ private:
  void AppendGXPipelineUID(const GXPipelineUid& config);

  // ASync Compiler Methods
-  void QueueVertexShaderCompile(const VertexShaderUid& uid);
-  void QueueVertexUberShaderCompile(const UberShader::VertexShaderUid& uid);
-  void QueuePixelShaderCompile(const PixelShaderUid& uid);
-  void QueuePixelUberShaderCompile(const UberShader::PixelShaderUid& uid);
-  void QueuePipelineCompile(const GXPipelineUid& uid);
-  void QueueUberPipelineCompile(const GXUberPipelineUid& uid);
+  void QueueVertexShaderCompile(const VertexShaderUid& uid, u32 priority);
+  void QueueVertexUberShaderCompile(const UberShader::VertexShaderUid& uid, u32 priority);
+  void QueuePixelShaderCompile(const PixelShaderUid& uid, u32 priority);
+  void QueuePixelUberShaderCompile(const UberShader::PixelShaderUid& uid, u32 priority);
+  void QueuePipelineCompile(const GXPipelineUid& uid, u32 priority);
+  void QueueUberPipelineCompile(const GXUberPipelineUid& uid, u32 priority);
+
+  // Priorities for compiling. The lower the value, the sooner the pipeline is compiled.
+  // The shader cache is compiled last, as it is the least likely to be required. On demand
+  // shaders are always compiled before pending ubershaders, as we want to use the ubershader
+  // for as few frames as possible, otherwise we risk framerate drops.
+  enum : u32
+  {
+    COMPILE_PRIORITY_ONDEMAND_PIPELINE = 100,
+    COMPILE_PRIORITY_UBERSHADER_PIPELINE = 200,
+    COMPILE_PRIORITY_SHADERCACHE_PIPELINE = 300
+  };

  // Configuration bits.
  APIType m_api_type = APIType::Nothing;