From 6ac97405df021d5d2bd9a529253bd5c5a418c1a9 Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Tue, 28 Jul 2020 00:08:02 -0400
Subject: [PATCH 1/7] Vk Async pipeline compilation

---
 src/video_core/renderer_vulkan/vk_device.cpp  |  2 +
 src/video_core/renderer_vulkan/vk_device.h    |  5 ++
 .../renderer_vulkan/vk_fence_manager.cpp      |  2 +-
 .../renderer_vulkan/vk_graphics_pipeline.cpp  |  5 +-
 .../renderer_vulkan/vk_graphics_pipeline.h    |  6 ++
 .../renderer_vulkan/vk_pipeline_cache.cpp     | 24 ++++++--
 .../renderer_vulkan/vk_pipeline_cache.h       | 27 ++++++++-
 .../renderer_vulkan/vk_rasterizer.cpp         | 27 ++++++++-
 .../renderer_vulkan/vk_rasterizer.h           | 10 ++++
 src/video_core/renderer_vulkan/wrapper.cpp    |  2 +-
 src/video_core/renderer_vulkan/wrapper.h      |  2 +-
 src/video_core/shader/async_shaders.cpp       | 59 +++++++++++++++++--
 src/video_core/shader/async_shaders.h         | 31 +++++++++-
 13 files changed, 182 insertions(+), 20 deletions(-)

diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 0c03e4d834..ebcfaa0e3e 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -382,6 +382,8 @@ bool VKDevice::Create() {
 
     graphics_queue = logical.GetQueue(graphics_family);
     present_queue = logical.GetQueue(present_family);
+
+    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
     return true;
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 529744f2d1..30cd3e1897 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -202,6 +202,10 @@ public:
         return reported_extensions;
     }
 
+    bool UseAsynchronousShaders() const {
+        return use_asynchronous_shaders;
+    }
+
     /// Checks if the physical device is suitable.
     static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
 
@@ -251,6 +255,7 @@ private:
     bool ext_custom_border_color{};            ///< Support for VK_EXT_custom_border_color.
     bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state.
     bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config.
+    bool use_asynchronous_shaders{};
 
     // Telemetry parameters
     std::string vendor_name;                      ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index a02be5487a..d7f65d435e 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -29,7 +29,7 @@ void InnerFence::Queue() {
     }
     ASSERT(!event);
 
-    event = device.GetLogical().CreateEvent();
+    event = device.GetLogical().CreateNewEvent();
     ticks = scheduler.Ticks();
 
     scheduler.RequestOutsideRenderPassOperationContext();
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index aaf930b901..7d51b98362 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -84,9 +84,8 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
       update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
       descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
                                                                         program)},
-      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
-                                                                             key.renderpass_params,
-                                                                             program)} {}
+      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)},
+      pipeline{CreatePipeline(key.renderpass_params, program)}, m_key{key} {}
 
 VKGraphicsPipeline::~VKGraphicsPipeline() = default;
 
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index a1d699a6c7..39c73a139f 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -54,6 +54,10 @@ public:
         return renderpass;
     }
 
+    const GraphicsPipelineCacheKey& GetCacheKey() {
+        return m_key;
+    }
+
 private:
     vk::DescriptorSetLayout CreateDescriptorSetLayout(
         vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
@@ -82,6 +86,8 @@ private:
 
     VkRenderPass renderpass;
     vk::Pipeline pipeline;
+
+    const GraphicsPipelineCacheKey& m_key;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 418c62bc43..45d4dcb8c5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -205,7 +205,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
     return last_shaders = shaders;
 }
 
-VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
+VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
+    const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
     MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
     if (last_graphics_pipeline && last_graphics_key == key) {
@@ -213,11 +214,27 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineC
     }
     last_graphics_key = key;
 
+    if (device.UseAsynchronousShaders()) {
+        auto work = async_shaders.GetCompletedWork();
+        for (std::size_t i = 0; i < work.size(); ++i) {
+            auto& entry = graphics_cache.at(work[i].pipeline->GetCacheKey());
+            entry = std::move(work[i].pipeline);
+        }
+        const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
+        if (is_cache_miss) {
+            LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+            const auto [program, bindings] = DecompileShaders(key.fixed_state);
+            async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params,
+                                            key.padding, key.shaders, key.fixed_state);
+        }
+        return *(last_graphics_pipeline = graphics_cache.at(key).get());
+    }
+
     const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
     auto& entry = pair->second;
     if (is_cache_miss) {
         LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-        const auto [program, bindings] = DecompileShaders(key);
+        const auto [program, bindings] = DecompileShaders(key.fixed_state);
         entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
                                                      update_descriptor_queue, renderpass_cache, key,
                                                      bindings, program);
@@ -312,8 +329,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
 }
 
 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
-VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
-    const auto& fixed_state = key.fixed_state;
+VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
     auto& memory_manager = system.GPU().MemoryManager();
     const auto& gpu = system.GPU().Maxwell3D();
 
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0a3fe65fbe..c70da6da40 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -22,6 +22,7 @@
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
@@ -152,16 +153,37 @@ public:
 
     std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
 
-    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
+    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
+                                            VideoCommon::Shader::AsyncShaders& async_shaders);
 
     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
 
+    const VKDevice& GetDevice() {
+        return device;
+    }
+
+    VKScheduler& GetScheduler() {
+        return scheduler;
+    }
+
+    VKDescriptorPool& GetDescriptorPool() {
+        return descriptor_pool;
+    }
+
+    VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() {
+        return update_descriptor_queue;
+    }
+
+    VKRenderPassCache& GetRenderpassCache() {
+        return renderpass_cache;
+    }
+
 protected:
     void OnShaderRemoval(Shader* shader) final;
 
 private:
     std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
-        const GraphicsPipelineCacheKey& key);
+        const FixedPipelineState& fixed_state);
 
     Core::System& system;
     const VKDevice& device;
@@ -177,6 +199,7 @@ private:
 
     GraphicsPipelineCacheKey last_graphics_key;
     VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+    std::vector<std::unique_ptr<VKGraphicsPipeline>> duplicates;
 
     std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
         graphics_cache;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7500e82444..6310e898ca 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -400,8 +400,25 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
       buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
       sampler_cache(device),
       fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
-      query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
+      query_cache(system, *this, device, scheduler),
+      wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
     scheduler.SetQueryCache(query_cache);
+    if (device.UseAsynchronousShaders()) {
+        // Max worker threads we should allow
+        constexpr auto MAX_THREADS = 2u;
+        // Amount of threads we should reserve for other parts of yuzu
+        constexpr auto RESERVED_THREADS = 6u;
+        // Get the amount of threads we can use(this can return zero)
+        const auto cpu_thread_count =
+            std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
+        // Deduce how many "extra" threads we have to use.
+        const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS;
+        // Always allow at least 1 thread regardless of our settings
+        const auto max_worker_count = std::max(1u, max_threads_unused);
+        // Don't use more than MAX_THREADS
+        const auto worker_count = std::min(max_worker_count, MAX_THREADS);
+        async_shaders.AllocateWorkers(worker_count);
+    }
 }
 
 RasterizerVulkan::~RasterizerVulkan() = default;
@@ -439,7 +456,13 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     key.renderpass_params = GetRenderPassParams(texceptions);
     key.padding = 0;
 
-    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
+    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+    if (&pipeline == nullptr || pipeline.GetHandle() == VK_NULL_HANDLE) {
+        // Async graphics pipeline was not ready.
+        system.GPU().TickWork();
+        return;
+    }
+
     scheduler.BindGraphicsPipeline(pipeline.GetHandle());
 
     const auto renderpass = pipeline.GetRenderPass();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 923178b0bb..27604b9a3c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -32,6 +32,7 @@
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
 
 namespace Core {
 class System;
@@ -136,6 +137,14 @@ public:
                            u32 pixel_stride) override;
     void SetupDirtyFlags() override;
 
+    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
+        return async_shaders;
+    }
+
+    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
+        return async_shaders;
+    }
+
     /// Maximum supported size that a constbuffer can have in bytes.
     static constexpr std::size_t MaxConstbufferSize = 0x10000;
     static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
@@ -278,6 +287,7 @@ private:
     VKMemoryManager& memory_manager;
     StateTracker& state_tracker;
     VKScheduler& scheduler;
+    VideoCommon::Shader::AsyncShaders async_shaders;
 
     VKStagingBufferPool staging_pool;
     VKDescriptorPool descriptor_pool;
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 14cac38ea7..c43d60adf7 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
     return ShaderModule(object, handle, *dld);
 }
 
-Event Device::CreateEvent() const {
+Event Device::CreateNewEvent() const {
     static constexpr VkEventCreateInfo ci{
         .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
         .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 31885ef42a..b9d3fedc10 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -721,7 +721,7 @@ public:
 
     ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
 
-    Event CreateEvent() const;
+    Event CreateNewEvent() const;
 
     SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
 
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index b7f66d7eef..335a0d05b3 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -113,15 +113,38 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
                                      VAddr cpu_addr) {
     WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
                                                     : AsyncShaders::Backend::OpenGL,
-                        device,
+                        &device,
                         shader_type,
                         uid,
                         std::move(code),
                         std::move(code_b),
                         main_offset,
                         compiler_settings,
-                        registry,
+                        &registry,
                         cpu_addr};
+
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push_back(std::move(params));
+    cv.notify_one();
+}
+
+void AsyncShaders::QueueVulkanShader(
+    Vulkan::VKPipelineCache* pp_cache, std::vector<VkDescriptorSetLayoutBinding> bindings,
+    Vulkan::SPIRVProgram program, Vulkan::RenderPassParams renderpass_params, u32 padding,
+    std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders,
+    Vulkan::FixedPipelineState fixed_state) {
+
+    WorkerParams params{
+        .backend = AsyncShaders::Backend::Vulkan,
+        .pp_cache = pp_cache,
+        .bindings = bindings,
+        .program = program,
+        .renderpass_params = renderpass_params,
+        .padding = padding,
+        .shaders = shaders,
+        .fixed_state = fixed_state,
+    };
+
     std::unique_lock lock(queue_mutex);
     pending_queue.push_back(std::move(params));
     cv.notify_one();
@@ -140,6 +163,7 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
         if (!HasWorkQueued()) {
             continue;
         }
+
         // Another thread beat us, just unlock and wait for the next load
         if (pending_queue.empty()) {
             continue;
@@ -152,10 +176,11 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
 
         if (work.backend == AsyncShaders::Backend::OpenGL ||
             work.backend == AsyncShaders::Backend::GLASM) {
-            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
+            VideoCommon::Shader::Registry registry = *work.registry;
+            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, registry);
             const auto scope = context->Acquire();
             auto program =
-                OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry);
+                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, registry);
             Result result{};
             result.backend = work.backend;
             result.cpu_address = work.cpu_address;
@@ -174,6 +199,32 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
                 std::unique_lock complete_lock(completed_mutex);
                 finished_work.push_back(std::move(result));
             }
+
+        } else if (work.backend == AsyncShaders::Backend::Vulkan) {
+            Vulkan::GraphicsPipelineCacheKey params_key{
+                work.renderpass_params,
+                work.padding,
+                work.shaders,
+                work.fixed_state,
+            };
+            {
+                std::unique_lock complete_lock(completed_mutex);
+
+                // Duplicate creation of pipelines leads to instability and crashing, caused by a
+                // race condition but band-aid solution is locking the making of the pipeline
+                // results in only one pipeline created at a time.
+                Result result{
+                    .backend = work.backend,
+                    .pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+                        work.pp_cache->GetDevice(), work.pp_cache->GetScheduler(),
+                        work.pp_cache->GetDescriptorPool(),
+                        work.pp_cache->GetUpdateDescriptorQueue(),
+                        work.pp_cache->GetRenderpassCache(), params_key, work.bindings,
+                        work.program),
+                };
+
+                finished_work.push_back(std::move(result));
+            }
         }
     }
 }
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 2f5ee94adf..702026ce24 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -14,6 +14,10 @@
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
 
 namespace Core::Frontend {
 class EmuWindow;
@@ -24,6 +28,10 @@ namespace Tegra {
 class GPU;
 }
 
+namespace Vulkan {
+class VKPipelineCache;
+}
+
 namespace VideoCommon::Shader {
 
 class AsyncShaders {
@@ -31,6 +39,7 @@ public:
     enum class Backend {
         OpenGL,
         GLASM,
+        Vulkan,
     };
 
     struct ResultPrograms {
@@ -46,6 +55,7 @@ public:
         std::vector<u64> code;
         std::vector<u64> code_b;
         Tegra::Engines::ShaderType shader_type;
+        std::unique_ptr<Vulkan::VKGraphicsPipeline> pipeline;
     };
 
     explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
@@ -76,6 +86,13 @@ public:
                            VideoCommon::Shader::CompilerSettings compiler_settings,
                            const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
 
+    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+                           std::vector<VkDescriptorSetLayoutBinding> bindings,
+                           Vulkan::SPIRVProgram program, Vulkan::RenderPassParams renderpass_params,
+                           u32 padding,
+                           std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders,
+                           Vulkan::FixedPipelineState fixed_state);
+
 private:
     void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
 
@@ -84,15 +101,25 @@ private:
 
     struct WorkerParams {
         AsyncShaders::Backend backend;
-        OpenGL::Device device;
+        // For OGL
+        const OpenGL::Device* device;
         Tegra::Engines::ShaderType shader_type;
         u64 uid;
         std::vector<u64> code;
         std::vector<u64> code_b;
         u32 main_offset;
         VideoCommon::Shader::CompilerSettings compiler_settings;
-        VideoCommon::Shader::Registry registry;
+        const VideoCommon::Shader::Registry* registry;
         VAddr cpu_address;
+
+        // For Vulkan
+        Vulkan::VKPipelineCache* pp_cache;
+        std::vector<VkDescriptorSetLayoutBinding> bindings;
+        Vulkan::SPIRVProgram program;
+        Vulkan::RenderPassParams renderpass_params;
+        u32 padding;
+        std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders;
+        Vulkan::FixedPipelineState fixed_state;
     };
 
     std::condition_variable cv;

From 4539073ce1d8fd6df03263e826d3805b4909e055 Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Thu, 30 Jul 2020 15:41:11 -0400
Subject: [PATCH 2/7] Address feedback. Bruteforce delete duplicates

---
 .../renderer_vulkan/vk_graphics_pipeline.h    |   2 +-
 .../renderer_vulkan/vk_pipeline_cache.cpp     |  16 ++-
 .../renderer_vulkan/vk_pipeline_cache.h       |  19 ++-
 .../renderer_vulkan/vk_rasterizer.cpp         |  18 +--
 .../renderer_vulkan/vk_rasterizer.h           |   2 +-
 src/video_core/shader/async_shaders.cpp       | 133 ++++++++++--------
 src/video_core/shader/async_shaders.h         |   4 +-
 7 files changed, 115 insertions(+), 79 deletions(-)

diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 39c73a139f..d50bd347c1 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -54,7 +54,7 @@ public:
         return renderpass;
     }
 
-    const GraphicsPipelineCacheKey& GetCacheKey() {
+    const GraphicsPipelineCacheKey& GetCacheKey() const {
         return m_key;
     }
 
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 45d4dcb8c5..a7ce621cae 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -205,20 +205,20 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
     return last_shaders = shaders;
 }
 
-VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
+VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
     const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
     MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
     if (last_graphics_pipeline && last_graphics_key == key) {
-        return *last_graphics_pipeline;
+        return last_graphics_pipeline;
     }
     last_graphics_key = key;
 
     if (device.UseAsynchronousShaders()) {
         auto work = async_shaders.GetCompletedWork();
-        for (std::size_t i = 0; i < work.size(); ++i) {
-            auto& entry = graphics_cache.at(work[i].pipeline->GetCacheKey());
-            entry = std::move(work[i].pipeline);
+        for (auto& w : work) {
+            auto& entry = graphics_cache.at(w.pipeline->GetCacheKey());
+            entry = std::move(w.pipeline);
         }
         const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
         if (is_cache_miss) {
@@ -227,7 +227,8 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
             async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params,
                                             key.padding, key.shaders, key.fixed_state);
         }
-        return *(last_graphics_pipeline = graphics_cache.at(key).get());
+        last_graphics_pipeline = graphics_cache.at(key).get();
+        return last_graphics_pipeline;
     }
 
     const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
@@ -239,7 +240,8 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
                                                      update_descriptor_queue, renderpass_cache, key,
                                                      bindings, program);
     }
-    return *(last_graphics_pipeline = entry.get());
+    last_graphics_pipeline = entry.get();
+    return last_graphics_pipeline;
 }
 
 VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index c70da6da40..404f2b3f78 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -153,31 +153,46 @@ public:
 
     std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
 
-    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
+    VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
                                             VideoCommon::Shader::AsyncShaders& async_shaders);
 
     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
 
-    const VKDevice& GetDevice() {
+    const VKDevice& GetDevice() const {
         return device;
     }
 
     VKScheduler& GetScheduler() {
         return scheduler;
     }
+    const VKScheduler& GetScheduler() const {
+        return scheduler;
+    }
 
     VKDescriptorPool& GetDescriptorPool() {
         return descriptor_pool;
     }
 
+    const VKDescriptorPool& GetDescriptorPool() const {
+        return descriptor_pool;
+    }
+
     VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() {
         return update_descriptor_queue;
     }
 
+    const VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() const {
+        return update_descriptor_queue;
+    }
+
     VKRenderPassCache& GetRenderpassCache() {
         return renderpass_cache;
     }
 
+    const VKRenderPassCache& GetRenderpassCache() const {
+        return renderpass_cache;
+    }
+
 protected:
     void OnShaderRemoval(Shader* shader) final;
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 6310e898ca..fc1b51a96a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -404,10 +404,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
       wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
     scheduler.SetQueryCache(query_cache);
     if (device.UseAsynchronousShaders()) {
+        // The following is subject to move into the allocate workers method, to be api agnostic
+
         // Max worker threads we should allow
-        constexpr auto MAX_THREADS = 2u;
+        constexpr u32 MAX_THREADS = 4;
         // Amount of threads we should reserve for other parts of yuzu
-        constexpr auto RESERVED_THREADS = 6u;
+        constexpr u32 RESERVED_THREADS = 6;
         // Get the amount of threads we can use(this can return zero)
         const auto cpu_thread_count =
             std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
@@ -456,16 +458,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     key.renderpass_params = GetRenderPassParams(texceptions);
     key.padding = 0;
 
-    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
-    if (&pipeline == nullptr || pipeline.GetHandle() == VK_NULL_HANDLE) {
+    auto pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+    if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
         // Async graphics pipeline was not ready.
         system.GPU().TickWork();
         return;
     }
 
-    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
+    scheduler.BindGraphicsPipeline(pipeline->GetHandle());
 
-    const auto renderpass = pipeline.GetRenderPass();
+    const auto renderpass = pipeline->GetRenderPass();
     const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
     scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
 
@@ -475,8 +477,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     BeginTransformFeedback();
 
-    const auto pipeline_layout = pipeline.GetLayout();
-    const auto descriptor_set = pipeline.CommitDescriptorSet();
+    const auto pipeline_layout = pipeline->GetLayout();
+    const auto descriptor_set = pipeline->CommitDescriptorSet();
     scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
         if (descriptor_set) {
             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 27604b9a3c..f640ba649d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -287,7 +287,6 @@ private:
     VKMemoryManager& memory_manager;
     StateTracker& state_tracker;
     VKScheduler& scheduler;
-    VideoCommon::Shader::AsyncShaders async_shaders;
 
     VKStagingBufferPool staging_pool;
     VKDescriptorPool descriptor_pool;
@@ -307,6 +306,7 @@ private:
     vk::Buffer default_buffer;
     VKMemoryCommit default_buffer_commit;
     vk::Event wfi_event;
+    VideoCommon::Shader::AsyncShaders async_shaders;
 
     std::array<View, Maxwell::NumRenderTargets> color_attachments;
     View zeta_attachment;
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 335a0d05b3..c536b025b8 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -111,20 +111,19 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
                                      VideoCommon::Shader::CompilerSettings compiler_settings,
                                      const VideoCommon::Shader::Registry& registry,
                                      VAddr cpu_addr) {
-    WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
-                                                    : AsyncShaders::Backend::OpenGL,
-                        &device,
-                        shader_type,
-                        uid,
-                        std::move(code),
-                        std::move(code_b),
-                        main_offset,
-                        compiler_settings,
-                        &registry,
-                        cpu_addr};
-
+    auto p = std::make_unique<WorkerParams>();
+    p->backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL;
+    p->device = &device;
+    p->shader_type = shader_type;
+    p->uid = uid;
+    p->code = std::move(code);
+    p->code_b = std::move(code_b);
+    p->main_offset = main_offset;
+    p->compiler_settings = compiler_settings;
+    p->registry = &registry;
+    p->cpu_address = cpu_addr;
     std::unique_lock lock(queue_mutex);
-    pending_queue.push_back(std::move(params));
+    pending_queue.push(std::move(p));
     cv.notify_one();
 }
 
@@ -134,19 +133,19 @@ void AsyncShaders::QueueVulkanShader(
     std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders,
     Vulkan::FixedPipelineState fixed_state) {
 
-    WorkerParams params{
-        .backend = AsyncShaders::Backend::Vulkan,
-        .pp_cache = pp_cache,
-        .bindings = bindings,
-        .program = program,
-        .renderpass_params = renderpass_params,
-        .padding = padding,
-        .shaders = shaders,
-        .fixed_state = fixed_state,
-    };
+    auto p = std::make_unique<WorkerParams>();
+
+    p->backend = Backend::Vulkan;
+    p->pp_cache = pp_cache;
+    p->bindings = bindings;
+    p->program = program;
+    p->renderpass_params = renderpass_params;
+    p->padding = padding;
+    p->shaders = shaders;
+    p->fixed_state = fixed_state;
 
     std::unique_lock lock(queue_mutex);
-    pending_queue.push_back(std::move(params));
+    pending_queue.push(std::move(p));
     cv.notify_one();
 }
 
@@ -168,64 +167,82 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
         if (pending_queue.empty()) {
             continue;
         }
-        // Pull work from queue
-        WorkerParams work = std::move(pending_queue.front());
-        pending_queue.pop_front();
 
+        // Pull work from queue
+        auto work = std::move(pending_queue.front());
+        pending_queue.pop();
         lock.unlock();
 
-        if (work.backend == AsyncShaders::Backend::OpenGL ||
-            work.backend == AsyncShaders::Backend::GLASM) {
-            VideoCommon::Shader::Registry registry = *work.registry;
-            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, registry);
+        if (work->backend == Backend::OpenGL || work->backend == Backend::GLASM) {
+            VideoCommon::Shader::Registry registry = *work->registry;
+            const ShaderIR ir(work->code, work->main_offset, work->compiler_settings, registry);
             const auto scope = context->Acquire();
             auto program =
-                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, registry);
+                OpenGL::BuildShader(*work->device, work->shader_type, work->uid, ir, registry);
             Result result{};
-            result.backend = work.backend;
-            result.cpu_address = work.cpu_address;
-            result.uid = work.uid;
-            result.code = std::move(work.code);
-            result.code_b = std::move(work.code_b);
-            result.shader_type = work.shader_type;
+            result.backend = work->backend;
+            result.cpu_address = work->cpu_address;
+            result.uid = work->uid;
+            result.code = std::move(work->code);
+            result.code_b = std::move(work->code_b);
+            result.shader_type = work->shader_type;
+            // LOG_CRITICAL(Render_Vulkan, "Shader hast been Compiled \t0x{:016X} id {}",
+            // result.uid, id);
 
-            if (work.backend == AsyncShaders::Backend::OpenGL) {
+            if (work->backend == Backend::OpenGL) {
                 result.program.opengl = std::move(program->source_program);
-            } else if (work.backend == AsyncShaders::Backend::GLASM) {
+            } else if (work->backend == Backend::GLASM) {
                 result.program.glasm = std::move(program->assembly_program);
             }
+            work.reset();
 
             {
                 std::unique_lock complete_lock(completed_mutex);
                 finished_work.push_back(std::move(result));
             }
-
-        } else if (work.backend == AsyncShaders::Backend::Vulkan) {
+        } else if (work->backend == Backend::Vulkan) {
             Vulkan::GraphicsPipelineCacheKey params_key{
-                work.renderpass_params,
-                work.padding,
-                work.shaders,
-                work.fixed_state,
+                .renderpass_params = work->renderpass_params,
+                .padding = work->padding,
+                .shaders = work->shaders,
+                .fixed_state = work->fixed_state,
             };
+
+            {
+                std::unique_lock find_lock{completed_mutex};
+                for (size_t i = 0; i < finished_work.size(); ++i) {
+                    // This loop deletes duplicate pipelines in finished_work
+                    // in favor of the pipeline about to be created
+
+                    if (finished_work[i].pipeline &&
+                        finished_work[i].pipeline->GetCacheKey().Hash() == params_key.Hash()) {
+                        LOG_CRITICAL(Render_Vulkan,
+                                     "Pipeliene was already here \t0x{:016X} matches 0x{:016X} ",
+                                     params_key.Hash(),
+                                     finished_work[i].pipeline->GetCacheKey().Hash());
+                        finished_work.erase(finished_work.begin() + i);
+                    }
+                }
+                find_lock.unlock();
+            }
+
+            auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+                work->pp_cache->GetDevice(), work->pp_cache->GetScheduler(),
+                work->pp_cache->GetDescriptorPool(), work->pp_cache->GetUpdateDescriptorQueue(),
+                work->pp_cache->GetRenderpassCache(), params_key, work->bindings, work->program);
+
             {
                 std::unique_lock complete_lock(completed_mutex);
-
-                // Duplicate creation of pipelines leads to instability and crashing, caused by a
-                // race condition but band-aid solution is locking the making of the pipeline
-                // results in only one pipeline created at a time.
                 Result result{
-                    .backend = work.backend,
-                    .pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
-                        work.pp_cache->GetDevice(), work.pp_cache->GetScheduler(),
-                        work.pp_cache->GetDescriptorPool(),
-                        work.pp_cache->GetUpdateDescriptorQueue(),
-                        work.pp_cache->GetRenderpassCache(), params_key, work.bindings,
-                        work.program),
+                    .backend = Backend::Vulkan,
+                    .pipeline = std::move(pipeline),
                 };
-
                 finished_work.push_back(std::move(result));
+                complete_lock.unlock();
             }
         }
+        // Give a chance for another thread to get work. Lessens duplicates
+        std::this_thread::yield();
     }
 }
 
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 702026ce24..d4eeb8fb6b 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -100,7 +100,7 @@ private:
     bool HasWorkQueued();
 
     struct WorkerParams {
-        AsyncShaders::Backend backend;
+        Backend backend;
         // For OGL
         const OpenGL::Device* device;
         Tegra::Engines::ShaderType shader_type;
@@ -128,7 +128,7 @@ private:
     std::atomic<bool> is_thread_exiting{};
     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
     std::vector<std::thread> worker_threads;
-    std::deque<WorkerParams> pending_queue;
+    std::queue<std::unique_ptr<WorkerParams>> pending_queue;
     std::vector<AsyncShaders::Result> finished_work;
     Core::Frontend::EmuWindow& emu_window;
 };

From c02464f64e302b8c9ea0f310e6fd85834d26cca5 Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Fri, 31 Jul 2020 17:30:05 -0400
Subject: [PATCH 3/7] Vk Async Worker directly emplace in cache

---
 .../renderer_vulkan/vk_pipeline_cache.cpp     | 18 +++--
 .../renderer_vulkan/vk_pipeline_cache.h       |  3 +
 src/video_core/shader/async_shaders.cpp       | 78 ++++++-------------
 3 files changed, 41 insertions(+), 58 deletions(-)

diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a7ce621cae..1a8b2c62b6 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -215,11 +215,7 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
     last_graphics_key = key;
 
     if (device.UseAsynchronousShaders()) {
-        auto work = async_shaders.GetCompletedWork();
-        for (auto& w : work) {
-            auto& entry = graphics_cache.at(w.pipeline->GetCacheKey());
-            entry = std::move(w.pipeline);
-        }
+        std::unique_lock lock{pipeline_cache};
         const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
         if (is_cache_miss) {
             LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
@@ -296,6 +292,18 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
     return *entry;
 }
 
+void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
+    std::unique_lock lock{pipeline_cache};
+    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(pipeline->GetCacheKey());
+    auto& entry = pair->second;
+    if (entry) {
+        LOG_INFO(Render_Vulkan, "Pipeline already here 0x{:016X}", pipeline->GetCacheKey().Hash());
+        duplicates.push_back(std::move(pipeline));
+    } else {
+        entry = std::move(pipeline);
+    }
+}
+
 void VKPipelineCache::OnShaderRemoval(Shader* shader) {
     bool finished = false;
     const auto Finish = [&] {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 404f2b3f78..777ef2038d 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -193,6 +193,8 @@ public:
         return renderpass_cache;
     }
 
+    void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
+
 protected:
     void OnShaderRemoval(Shader* shader) final;
 
@@ -216,6 +218,7 @@ private:
     VKGraphicsPipeline* last_graphics_pipeline = nullptr;
     std::vector<std::unique_ptr<VKGraphicsPipeline>> duplicates;
 
+    std::mutex pipeline_cache;
     std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
         graphics_cache;
     std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index c536b025b8..54a81460b1 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -111,19 +111,19 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
                                      VideoCommon::Shader::CompilerSettings compiler_settings,
                                      const VideoCommon::Shader::Registry& registry,
                                      VAddr cpu_addr) {
-    auto p = std::make_unique<WorkerParams>();
-    p->backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL;
-    p->device = &device;
-    p->shader_type = shader_type;
-    p->uid = uid;
-    p->code = std::move(code);
-    p->code_b = std::move(code_b);
-    p->main_offset = main_offset;
-    p->compiler_settings = compiler_settings;
-    p->registry = &registry;
-    p->cpu_address = cpu_addr;
+    auto params = std::make_unique<WorkerParams>();
+    params->backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL;
+    params->device = &device;
+    params->shader_type = shader_type;
+    params->uid = uid;
+    params->code = std::move(code);
+    params->code_b = std::move(code_b);
+    params->main_offset = main_offset;
+    params->compiler_settings = compiler_settings;
+    params->registry = &registry;
+    params->cpu_address = cpu_addr;
     std::unique_lock lock(queue_mutex);
-    pending_queue.push(std::move(p));
+    pending_queue.push(std::move(params));
     cv.notify_one();
 }
 
@@ -133,19 +133,19 @@ void AsyncShaders::QueueVulkanShader(
     std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders,
     Vulkan::FixedPipelineState fixed_state) {
 
-    auto p = std::make_unique<WorkerParams>();
+    auto params = std::make_unique<WorkerParams>();
 
-    p->backend = Backend::Vulkan;
-    p->pp_cache = pp_cache;
-    p->bindings = bindings;
-    p->program = program;
-    p->renderpass_params = renderpass_params;
-    p->padding = padding;
-    p->shaders = shaders;
-    p->fixed_state = fixed_state;
+    params->backend = Backend::Vulkan;
+    params->pp_cache = pp_cache;
+    params->bindings = bindings;
+    params->program = program;
+    params->renderpass_params = renderpass_params;
+    params->padding = padding;
+    params->shaders = shaders;
+    params->fixed_state = fixed_state;
 
     std::unique_lock lock(queue_mutex);
-    pending_queue.push(std::move(p));
+    pending_queue.push(std::move(params));
     cv.notify_one();
 }
 
@@ -162,7 +162,6 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
         if (!HasWorkQueued()) {
             continue;
         }
-
         // Another thread beat us, just unlock and wait for the next load
         if (pending_queue.empty()) {
             continue;
@@ -186,8 +185,6 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
             result.code = std::move(work->code);
             result.code_b = std::move(work->code_b);
             result.shader_type = work->shader_type;
-            // LOG_CRITICAL(Render_Vulkan, "Shader hast been Compiled \t0x{:016X} id {}",
-            // result.uid, id);
 
             if (work->backend == Backend::OpenGL) {
                 result.program.opengl = std::move(program->source_program);
@@ -208,40 +205,15 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
                 .fixed_state = work->fixed_state,
             };
 
-            {
-                std::unique_lock find_lock{completed_mutex};
-                for (size_t i = 0; i < finished_work.size(); ++i) {
-                    // This loop deletes duplicate pipelines in finished_work
-                    // in favor of the pipeline about to be created
-
-                    if (finished_work[i].pipeline &&
-                        finished_work[i].pipeline->GetCacheKey().Hash() == params_key.Hash()) {
-                        LOG_CRITICAL(Render_Vulkan,
-                                     "Pipeliene was already here \t0x{:016X} matches 0x{:016X} ",
-                                     params_key.Hash(),
-                                     finished_work[i].pipeline->GetCacheKey().Hash());
-                        finished_work.erase(finished_work.begin() + i);
-                    }
-                }
-                find_lock.unlock();
-            }
-
             auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
                 work->pp_cache->GetDevice(), work->pp_cache->GetScheduler(),
                 work->pp_cache->GetDescriptorPool(), work->pp_cache->GetUpdateDescriptorQueue(),
                 work->pp_cache->GetRenderpassCache(), params_key, work->bindings, work->program);
 
-            {
-                std::unique_lock complete_lock(completed_mutex);
-                Result result{
-                    .backend = Backend::Vulkan,
-                    .pipeline = std::move(pipeline),
-                };
-                finished_work.push_back(std::move(result));
-                complete_lock.unlock();
-            }
+            work->pp_cache->EmplacePipeline(std::move(pipeline));
+            work.reset();
         }
-        // Give a chance for another thread to get work. Lessens duplicates
+        // Give a chance for another thread to get work.
         std::this_thread::yield();
     }
 }

From 31a76410e8fa09462d960c10148c075125dc385a Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Sun, 2 Aug 2020 13:05:41 -0400
Subject: [PATCH 4/7] Address feedback, add shader compile notifier, update
 setting text

---
 src/video_core/renderer_vulkan/vk_device.h    |   5 +-
 .../renderer_vulkan/vk_graphics_pipeline.cpp  |   9 +-
 .../renderer_vulkan/vk_graphics_pipeline.h    |  30 ++++-
 .../renderer_vulkan/vk_pipeline_cache.cpp     |  23 ++--
 .../renderer_vulkan/vk_pipeline_cache.h       |  58 ----------
 .../renderer_vulkan/vk_rasterizer.cpp         |  19 ++--
 src/video_core/shader/async_shaders.cpp       | 106 +++++++++---------
 src/video_core/shader/async_shaders.h         |  23 ++--
 .../configure_graphics_advanced.ui            |   2 +-
 9 files changed, 115 insertions(+), 160 deletions(-)

diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 30cd3e1897..26a233db18 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -202,6 +202,7 @@ public:
         return reported_extensions;
     }
 
+    /// Returns true if the setting for async shader compilation is enabled.
     bool UseAsynchronousShaders() const {
         return use_asynchronous_shaders;
     }
@@ -255,7 +256,9 @@ private:
     bool ext_custom_border_color{};            ///< Support for VK_EXT_custom_border_color.
     bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state.
     bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config.
-    bool use_asynchronous_shaders{};
+
+    // Asynchronous Graphics Pipeline setting
+    bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
 
     // Telemetry parameters
     std::string vendor_name;                      ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 7d51b98362..5dc4cd5af3 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -78,14 +78,15 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
                                        const GraphicsPipelineCacheKey& key,
                                        vk::Span<VkDescriptorSetLayoutBinding> bindings,
                                        const SPIRVProgram& program)
-    : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
+    : device{device}, scheduler{scheduler}, hash{key.Hash()}, cache_key{key},
       descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
       descriptor_allocator{descriptor_pool, *descriptor_set_layout},
       update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
       descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
                                                                         program)},
-      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)},
-      pipeline{CreatePipeline(key.renderpass_params, program)}, m_key{key} {}
+      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
+                                                                             key.renderpass_params,
+                                                                             program)} {}
 
 VKGraphicsPipeline::~VKGraphicsPipeline() = default;
 
@@ -180,7 +181,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
 
 vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
                                                 const SPIRVProgram& program) const {
-    const auto& state = fixed_state;
+    const auto& state = cache_key.fixed_state;
     const auto& viewport_swizzles = state.viewport_swizzles;
 
     FixedPipelineState::DynamicState dynamic;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index d50bd347c1..9d462db0a3 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -19,7 +19,27 @@ namespace Vulkan {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
-struct GraphicsPipelineCacheKey;
+struct GraphicsPipelineCacheKey {
+    RenderPassParams renderpass_params;
+    u32 padding;
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+    FixedPipelineState fixed_state;
+
+    std::size_t Hash() const noexcept;
+
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
+
+    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+
+    std::size_t Size() const noexcept {
+        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
+    }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
 
 class VKDescriptorPool;
 class VKDevice;
@@ -54,8 +74,8 @@ public:
         return renderpass;
     }
 
-    const GraphicsPipelineCacheKey& GetCacheKey() const {
-        return m_key;
+    GraphicsPipelineCacheKey GetCacheKey() const {
+        return cache_key;
     }
 
 private:
@@ -74,8 +94,8 @@ private:
 
     const VKDevice& device;
     VKScheduler& scheduler;
-    const FixedPipelineState fixed_state;
     const u64 hash;
+    GraphicsPipelineCacheKey cache_key;
 
     vk::DescriptorSetLayout descriptor_set_layout;
     DescriptorAllocator descriptor_allocator;
@@ -86,8 +106,6 @@ private:
 
     VkRenderPass renderpass;
     vk::Pipeline pipeline;
-
-    const GraphicsPipelineCacheKey& m_key;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 1a8b2c62b6..20ffbeb388 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -28,6 +28,7 @@
 #include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader_cache.h"
+#include "video_core/shader_notify.h"
 
 namespace Vulkan {
 
@@ -214,27 +215,31 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
     }
     last_graphics_key = key;
 
-    if (device.UseAsynchronousShaders()) {
+    if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) {
         std::unique_lock lock{pipeline_cache};
         const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
         if (is_cache_miss) {
+            system.GPU().ShaderNotify().MarkSharderBuilding();
             LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
             const auto [program, bindings] = DecompileShaders(key.fixed_state);
-            async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params,
-                                            key.padding, key.shaders, key.fixed_state);
+            async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
+                                            update_descriptor_queue, renderpass_cache, bindings,
+                                            program, key);
         }
-        last_graphics_pipeline = graphics_cache.at(key).get();
+        last_graphics_pipeline = pair->second.get();
         return last_graphics_pipeline;
     }
 
     const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
     auto& entry = pair->second;
     if (is_cache_miss) {
+        system.GPU().ShaderNotify().MarkSharderBuilding();
         LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
         const auto [program, bindings] = DecompileShaders(key.fixed_state);
         entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
                                                      update_descriptor_queue, renderpass_cache, key,
                                                      bindings, program);
+        system.GPU().ShaderNotify().MarkShaderComplete();
     }
     last_graphics_pipeline = entry.get();
     return last_graphics_pipeline;
@@ -294,14 +299,8 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
 
 void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
     std::unique_lock lock{pipeline_cache};
-    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(pipeline->GetCacheKey());
-    auto& entry = pair->second;
-    if (entry) {
-        LOG_INFO(Render_Vulkan, "Pipeline already here 0x{:016X}", pipeline->GetCacheKey().Hash());
-        duplicates.push_back(std::move(pipeline));
-    } else {
-        entry = std::move(pipeline);
-    }
+    graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
+    system.GPU().ShaderNotify().MarkShaderComplete();
 }
 
 void VKPipelineCache::OnShaderRemoval(Shader* shader) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 777ef2038d..c04829e77f 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -44,28 +44,6 @@ class VKUpdateDescriptorQueue;
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
-struct GraphicsPipelineCacheKey {
-    RenderPassParams renderpass_params;
-    u32 padding;
-    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
-    FixedPipelineState fixed_state;
-
-    std::size_t Hash() const noexcept;
-
-    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
-
-    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
-        return !operator==(rhs);
-    }
-
-    std::size_t Size() const noexcept {
-        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
-    }
-};
-static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
-
 struct ComputePipelineCacheKey {
     GPUVAddr shader;
     u32 shared_memory_size;
@@ -158,41 +136,6 @@ public:
 
     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
 
-    const VKDevice& GetDevice() const {
-        return device;
-    }
-
-    VKScheduler& GetScheduler() {
-        return scheduler;
-    }
-    const VKScheduler& GetScheduler() const {
-        return scheduler;
-    }
-
-    VKDescriptorPool& GetDescriptorPool() {
-        return descriptor_pool;
-    }
-
-    const VKDescriptorPool& GetDescriptorPool() const {
-        return descriptor_pool;
-    }
-
-    VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() {
-        return update_descriptor_queue;
-    }
-
-    const VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() const {
-        return update_descriptor_queue;
-    }
-
-    VKRenderPassCache& GetRenderpassCache() {
-        return renderpass_cache;
-    }
-
-    const VKRenderPassCache& GetRenderpassCache() const {
-        return renderpass_cache;
-    }
-
     void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
 
 protected:
@@ -216,7 +159,6 @@ private:
 
     GraphicsPipelineCacheKey last_graphics_key;
     VKGraphicsPipeline* last_graphics_pipeline = nullptr;
-    std::vector<std::unique_ptr<VKGraphicsPipeline>> duplicates;
 
     std::mutex pipeline_cache;
     std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index fc1b51a96a..720802ad50 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -14,6 +14,7 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
+#include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/settings.h"
 #include "video_core/engines/kepler_compute.h"
@@ -408,15 +409,10 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
 
         // Max worker threads we should allow
         constexpr u32 MAX_THREADS = 4;
-        // Amount of threads we should reserve for other parts of yuzu
-        constexpr u32 RESERVED_THREADS = 6;
-        // Get the amount of threads we can use(this can return zero)
-        const auto cpu_thread_count =
-            std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
-        // Deduce how many "extra" threads we have to use.
-        const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS;
+        // Deduce how many threads we can use
+        const auto threads_used = std::thread::hardware_concurrency() / 4;
         // Always allow at least 1 thread regardless of our settings
-        const auto max_worker_count = std::max(1u, max_threads_unused);
+        const auto max_worker_count = std::max(1U, threads_used);
         // Don't use more than MAX_THREADS
         const auto worker_count = std::min(max_worker_count, MAX_THREADS);
         async_shaders.AllocateWorkers(worker_count);
@@ -432,6 +428,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     query_cache.UpdateCounters();
 
+    SCOPE_EXIT({ system.GPU().TickWork(); });
+
     const auto& gpu = system.GPU().Maxwell3D();
     GraphicsPipelineCacheKey key;
     key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
@@ -458,10 +456,9 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     key.renderpass_params = GetRenderPassParams(texceptions);
     key.padding = 0;
 
-    auto pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+    auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
     if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
         // Async graphics pipeline was not ready.
-        system.GPU().TickWork();
         return;
     }
 
@@ -488,8 +485,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     });
 
     EndTransformFeedback();
-
-    system.GPU().TickWork();
 }
 
 void RasterizerVulkan::Clear() {
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 54a81460b1..ea813d506a 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -2,7 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <chrono>
 #include <condition_variable>
 #include <mutex>
 #include <thread>
@@ -111,38 +110,44 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
                                      VideoCommon::Shader::CompilerSettings compiler_settings,
                                      const VideoCommon::Shader::Registry& registry,
                                      VAddr cpu_addr) {
-    auto params = std::make_unique<WorkerParams>();
-    params->backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL;
-    params->device = &device;
-    params->shader_type = shader_type;
-    params->uid = uid;
-    params->code = std::move(code);
-    params->code_b = std::move(code_b);
-    params->main_offset = main_offset;
-    params->compiler_settings = compiler_settings;
-    params->registry = &registry;
-    params->cpu_address = cpu_addr;
+    WorkerParams params{
+        .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
+        .device = &device,
+        .shader_type = shader_type,
+        .uid = uid,
+        .code = std::move(code),
+        .code_b = std::move(code_b),
+        .main_offset = main_offset,
+        .compiler_settings = compiler_settings,
+        .registry = &registry,
+        .cpu_address = cpu_addr,
+    };
     std::unique_lock lock(queue_mutex);
     pending_queue.push(std::move(params));
     cv.notify_one();
 }
 
-void AsyncShaders::QueueVulkanShader(
-    Vulkan::VKPipelineCache* pp_cache, std::vector<VkDescriptorSetLayoutBinding> bindings,
-    Vulkan::SPIRVProgram program, Vulkan::RenderPassParams renderpass_params, u32 padding,
-    std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders,
-    Vulkan::FixedPipelineState fixed_state) {
+void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+                                     const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
+                                     Vulkan::VKDescriptorPool& descriptor_pool,
+                                     Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+                                     Vulkan::VKRenderPassCache& renderpass_cache,
+                                     std::vector<VkDescriptorSetLayoutBinding> bindings,
+                                     Vulkan::SPIRVProgram program,
+                                     Vulkan::GraphicsPipelineCacheKey key) {
 
-    auto params = std::make_unique<WorkerParams>();
-
-    params->backend = Backend::Vulkan;
-    params->pp_cache = pp_cache;
-    params->bindings = bindings;
-    params->program = program;
-    params->renderpass_params = renderpass_params;
-    params->padding = padding;
-    params->shaders = shaders;
-    params->fixed_state = fixed_state;
+    WorkerParams params{
+        .backend = Backend::Vulkan,
+        .pp_cache = pp_cache,
+        .vk_device = &device,
+        .scheduler = &scheduler,
+        .descriptor_pool = &descriptor_pool,
+        .update_descriptor_queue = &update_descriptor_queue,
+        .renderpass_cache = &renderpass_cache,
+        .bindings = bindings,
+        .program = program,
+        .key = key,
+    };
 
     std::unique_lock lock(queue_mutex);
     pending_queue.push(std::move(params));
@@ -150,7 +155,6 @@ void AsyncShaders::QueueVulkanShader(
 }
 
 void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
-    using namespace std::chrono_literals;
     while (!is_thread_exiting.load(std::memory_order_relaxed)) {
         std::unique_lock lock{queue_mutex};
         cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
@@ -168,53 +172,43 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
         }
 
         // Pull work from queue
-        auto work = std::move(pending_queue.front());
+        WorkerParams work = std::move(pending_queue.front());
         pending_queue.pop();
         lock.unlock();
 
-        if (work->backend == Backend::OpenGL || work->backend == Backend::GLASM) {
-            VideoCommon::Shader::Registry registry = *work->registry;
-            const ShaderIR ir(work->code, work->main_offset, work->compiler_settings, registry);
+        if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
+            VideoCommon::Shader::Registry registry = *work.registry;
+            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, registry);
             const auto scope = context->Acquire();
             auto program =
-                OpenGL::BuildShader(*work->device, work->shader_type, work->uid, ir, registry);
+                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, registry);
             Result result{};
-            result.backend = work->backend;
-            result.cpu_address = work->cpu_address;
-            result.uid = work->uid;
-            result.code = std::move(work->code);
-            result.code_b = std::move(work->code_b);
-            result.shader_type = work->shader_type;
+            result.backend = work.backend;
+            result.cpu_address = work.cpu_address;
+            result.uid = work.uid;
+            result.code = std::move(work.code);
+            result.code_b = std::move(work.code_b);
+            result.shader_type = work.shader_type;
 
-            if (work->backend == Backend::OpenGL) {
+            if (work.backend == Backend::OpenGL) {
                 result.program.opengl = std::move(program->source_program);
-            } else if (work->backend == Backend::GLASM) {
+            } else if (work.backend == Backend::GLASM) {
                 result.program.glasm = std::move(program->assembly_program);
             }
-            work.reset();
 
             {
                 std::unique_lock complete_lock(completed_mutex);
                 finished_work.push_back(std::move(result));
             }
-        } else if (work->backend == Backend::Vulkan) {
-            Vulkan::GraphicsPipelineCacheKey params_key{
-                .renderpass_params = work->renderpass_params,
-                .padding = work->padding,
-                .shaders = work->shaders,
-                .fixed_state = work->fixed_state,
-            };
+        } else if (work.backend == Backend::Vulkan) {
 
             auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
-                work->pp_cache->GetDevice(), work->pp_cache->GetScheduler(),
-                work->pp_cache->GetDescriptorPool(), work->pp_cache->GetUpdateDescriptorQueue(),
-                work->pp_cache->GetRenderpassCache(), params_key, work->bindings, work->program);
+                *work.vk_device, *work.scheduler, *work.descriptor_pool,
+                *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
+                work.program);
 
-            work->pp_cache->EmplacePipeline(std::move(pipeline));
-            work.reset();
+            work.pp_cache->EmplacePipeline(std::move(pipeline));
         }
-        // Give a chance for another thread to get work.
-        std::this_thread::yield();
     }
 }
 
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index d4eeb8fb6b..7c10bd63f9 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -86,12 +86,13 @@ public:
                            VideoCommon::Shader::CompilerSettings compiler_settings,
                            const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
 
-    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
+                           Vulkan::VKScheduler& scheduler,
+                           Vulkan::VKDescriptorPool& descriptor_pool,
+                           Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+                           Vulkan::VKRenderPassCache& renderpass_cache,
                            std::vector<VkDescriptorSetLayoutBinding> bindings,
-                           Vulkan::SPIRVProgram program, Vulkan::RenderPassParams renderpass_params,
-                           u32 padding,
-                           std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders,
-                           Vulkan::FixedPipelineState fixed_state);
+                           Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
 
 private:
     void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
@@ -114,12 +115,14 @@ private:
 
         // For Vulkan
         Vulkan::VKPipelineCache* pp_cache;
+        const Vulkan::VKDevice* vk_device;
+        Vulkan::VKScheduler* scheduler;
+        Vulkan::VKDescriptorPool* descriptor_pool;
+        Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
+        Vulkan::VKRenderPassCache* renderpass_cache;
         std::vector<VkDescriptorSetLayoutBinding> bindings;
         Vulkan::SPIRVProgram program;
-        Vulkan::RenderPassParams renderpass_params;
-        u32 padding;
-        std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders;
-        Vulkan::FixedPipelineState fixed_state;
+        Vulkan::GraphicsPipelineCacheKey key;
     };
 
     std::condition_variable cv;
@@ -128,7 +131,7 @@ private:
     std::atomic<bool> is_thread_exiting{};
     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
     std::vector<std::thread> worker_threads;
-    std::queue<std::unique_ptr<WorkerParams>> pending_queue;
+    std::queue<WorkerParams> pending_queue;
     std::vector<AsyncShaders::Result> finished_work;
     Core::Frontend::EmuWindow& emu_window;
 };
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index a793c803de..846a30586d 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -92,7 +92,7 @@
            <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
           </property>
           <property name="text">
-           <string>Use asynchronous shader building (experimental, OpenGL or Assembly shaders only)</string>
+           <string>Use asynchronous shader building (experimental)</string>
           </property>
          </widget>
         </item>

From 1b829fbd7a36f9c2b553b04aa39bdf8135d30458 Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Wed, 5 Aug 2020 12:53:26 -0400
Subject: [PATCH 5/7] move thread 1/4 count computation into allocate workers
 method

---
 src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 +---------
 src/video_core/renderer_vulkan/vk_rasterizer.cpp | 12 +-----------
 src/video_core/shader/async_shaders.cpp          | 13 +++++++++++--
 src/video_core/shader/async_shaders.h            |  2 +-
 4 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index cb284db772..4af5824cdd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -177,15 +177,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
     }
 
     if (device.UseAsynchronousShaders()) {
-        // Max worker threads we should allow
-        constexpr u32 MAX_THREADS = 4;
-        // Deduce how many threads we can use
-        const u32 threads_used = std::thread::hardware_concurrency() / 4;
-        // Always allow at least 1 thread regardless of our settings
-        const auto max_worker_count = std::max(1U, threads_used);
-        // Don't use more than MAX_THREADS
-        const auto worker_count = std::min(max_worker_count, MAX_THREADS);
-        async_shaders.AllocateWorkers(worker_count);
+        async_shaders.AllocateWorkers();
     }
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 720802ad50..936f761958 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -405,17 +405,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
       wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
     scheduler.SetQueryCache(query_cache);
     if (device.UseAsynchronousShaders()) {
-        // The following is subject to move into the allocate workers method, to be api agnostic
-
-        // Max worker threads we should allow
-        constexpr u32 MAX_THREADS = 4;
-        // Deduce how many threads we can use
-        const auto threads_used = std::thread::hardware_concurrency() / 4;
-        // Always allow at least 1 thread regardless of our settings
-        const auto max_worker_count = std::max(1U, threads_used);
-        // Don't use more than MAX_THREADS
-        const auto worker_count = std::min(max_worker_count, MAX_THREADS);
-        async_shaders.AllocateWorkers(worker_count);
+        async_shaders.AllocateWorkers();
     }
 }
 
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index ea813d506a..6a1b8999c9 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -19,9 +19,18 @@ AsyncShaders::~AsyncShaders() {
     KillWorkers();
 }
 
-void AsyncShaders::AllocateWorkers(std::size_t num_workers) {
+void AsyncShaders::AllocateWorkers() {
+    // Max worker threads we should allow
+    constexpr u32 MAX_THREADS = 4;
+    // Deduce how many threads we can use
+    const u32 threads_used = std::thread::hardware_concurrency() / 4;
+    // Always allow at least 1 thread regardless of our settings
+    const auto max_worker_count = std::max(1U, threads_used);
+    // Don't use more than MAX_THREADS
+    const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+
     // If we're already have workers queued or don't want to queue workers, ignore
-    if (num_workers == worker_threads.size() || num_workers == 0) {
+    if (num_workers == worker_threads.size()) {
         return;
     }
 
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 7c10bd63f9..5b58dd9bdf 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -62,7 +62,7 @@ public:
     ~AsyncShaders();
 
     /// Start up shader worker threads
-    void AllocateWorkers(std::size_t num_workers);
+    void AllocateWorkers();
 
     /// Clear the shader queue and kill all worker threads
     void FreeWorkers();

From f49ffdd6488e74591ed66d911c13c3684762d425 Mon Sep 17 00:00:00 2001
From: Ameer J <52414509+ameerj@users.noreply.github.com>
Date: Wed, 5 Aug 2020 16:41:22 -0400
Subject: [PATCH 6/7] Morph: Update worker allocation comment

Co-authored-by: Morph <39850852+Morph1984@users.noreply.github.com>
---
 src/video_core/shader/async_shaders.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 6a1b8999c9..91d1b6bbda 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -29,7 +29,7 @@ void AsyncShaders::AllocateWorkers() {
     // Don't use more than MAX_THREADS
     const auto num_workers = std::min(max_worker_count, MAX_THREADS);
 
-    // If we're already have workers queued or don't want to queue workers, ignore
+    // If we already have workers queued, ignore
     if (num_workers == worker_threads.size()) {
         return;
     }

From fde8102a415c546e88346258bf42de2a248113b1 Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Sun, 16 Aug 2020 16:33:21 -0400
Subject: [PATCH 7/7] Remove unneeded newlines, optional Registry in shader
 params

Addressing feedback from Rodrigo
---
 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 7 +++----
 src/video_core/renderer_vulkan/vk_graphics_pipeline.h   | 2 +-
 src/video_core/renderer_vulkan/vk_pipeline_cache.cpp    | 2 +-
 src/video_core/shader/async_shaders.cpp                 | 9 +++------
 src/video_core/shader/async_shaders.h                   | 3 +--
 5 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 5dc4cd5af3..2e46c62785 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -78,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
                                        const GraphicsPipelineCacheKey& key,
                                        vk::Span<VkDescriptorSetLayoutBinding> bindings,
                                        const SPIRVProgram& program)
-    : device{device}, scheduler{scheduler}, hash{key.Hash()}, cache_key{key},
+    : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()},
       descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
       descriptor_allocator{descriptor_pool, *descriptor_set_layout},
       update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
       descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
                                                                         program)},
-      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
-                                                                             key.renderpass_params,
-                                                                             program)} {}
+      renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)},
+      pipeline{CreatePipeline(cache_key.renderpass_params, program)} {}
 
 VKGraphicsPipeline::~VKGraphicsPipeline() = default;
 
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 9d462db0a3..58aa35efdd 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -94,8 +94,8 @@ private:
 
     const VKDevice& device;
     VKScheduler& scheduler;
+    const GraphicsPipelineCacheKey cache_key;
     const u64 hash;
-    GraphicsPipelineCacheKey cache_key;
 
     vk::DescriptorSetLayout descriptor_set_layout;
     DescriptorAllocator descriptor_allocator;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 20ffbeb388..cfdcdd6ab5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -298,9 +298,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
 }
 
 void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
+    system.GPU().ShaderNotify().MarkShaderComplete();
     std::unique_lock lock{pipeline_cache};
     graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
-    system.GPU().ShaderNotify().MarkShaderComplete();
 }
 
 void VKPipelineCache::OnShaderRemoval(Shader* shader) {
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 91d1b6bbda..6c19eaf075 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -128,7 +128,7 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
         .code_b = std::move(code_b),
         .main_offset = main_offset,
         .compiler_settings = compiler_settings,
-        .registry = &registry,
+        .registry = registry,
         .cpu_address = cpu_addr,
     };
     std::unique_lock lock(queue_mutex);
@@ -144,7 +144,6 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
                                      std::vector<VkDescriptorSetLayoutBinding> bindings,
                                      Vulkan::SPIRVProgram program,
                                      Vulkan::GraphicsPipelineCacheKey key) {
-
     WorkerParams params{
         .backend = Backend::Vulkan,
         .pp_cache = pp_cache,
@@ -186,11 +185,10 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
         lock.unlock();
 
         if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
-            VideoCommon::Shader::Registry registry = *work.registry;
-            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, registry);
+            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
             const auto scope = context->Acquire();
             auto program =
-                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, registry);
+                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
             Result result{};
             result.backend = work.backend;
             result.cpu_address = work.cpu_address;
@@ -210,7 +208,6 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
                 finished_work.push_back(std::move(result));
             }
         } else if (work.backend == Backend::Vulkan) {
-
             auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
                 *work.vk_device, *work.scheduler, *work.descriptor_pool,
                 *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 5b58dd9bdf..d5ae814d53 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -55,7 +55,6 @@ public:
         std::vector<u64> code;
         std::vector<u64> code_b;
         Tegra::Engines::ShaderType shader_type;
-        std::unique_ptr<Vulkan::VKGraphicsPipeline> pipeline;
     };
 
     explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
@@ -110,7 +109,7 @@ private:
         std::vector<u64> code_b;
         u32 main_offset;
         VideoCommon::Shader::CompilerSettings compiler_settings;
-        const VideoCommon::Shader::Registry* registry;
+        std::optional<VideoCommon::Shader::Registry> registry;
         VAddr cpu_address;
 
         // For Vulkan