From 702ed212075e26378181d21e07e693ce078b8dee Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 26 Dec 2020 23:22:24 +1000 Subject: [PATCH] HostDisplay: Add threaded presentation for Vulkan renderer Can add a significant speedup for fast forward. Automatically disabled when vsync is enabled. --- .../app/src/cpp/android_host_interface.cpp | 5 +- android/app/src/main/res/values/strings.xml | 2 + .../src/main/res/xml/advanced_preferences.xml | 6 + src/common/vulkan/context.cpp | 140 ++++++++++++++---- src/common/vulkan/context.h | 36 ++++- src/core/host_display.h | 6 +- src/core/host_interface.cpp | 4 +- src/core/settings.cpp | 2 + src/core/settings.h | 1 + .../libretro_host_display.cpp | 6 +- .../libretro_host_display.h | 6 +- .../libretro_host_interface.cpp | 4 +- .../libretro_opengl_host_display.cpp | 2 +- .../libretro_opengl_host_display.h | 5 +- .../libretro_vulkan_host_display.cpp | 2 +- .../libretro_vulkan_host_display.h | 3 +- src/duckstation-qt/displaysettingswidget.cpp | 10 +- src/duckstation-qt/displaysettingswidget.ui | 7 + src/duckstation-qt/mainwindow.cpp | 15 +- src/duckstation-qt/mainwindow.h | 3 +- src/duckstation-qt/qthostinterface.cpp | 7 +- src/duckstation-qt/qthostinterface.h | 3 +- src/duckstation-sdl/sdl_host_interface.cpp | 6 +- src/frontend-common/d3d11_host_display.cpp | 6 +- src/frontend-common/d3d11_host_display.h | 4 +- src/frontend-common/opengl_host_display.cpp | 6 +- src/frontend-common/opengl_host_display.h | 4 +- src/frontend-common/vulkan_host_display.cpp | 10 +- src/frontend-common/vulkan_host_display.h | 6 +- 29 files changed, 241 insertions(+), 76 deletions(-) diff --git a/android/app/src/cpp/android_host_interface.cpp b/android/app/src/cpp/android_host_interface.cpp index 6fd0796c5..b9a7b5155 100644 --- a/android/app/src/cpp/android_host_interface.cpp +++ b/android/app/src/cpp/android_host_interface.cpp @@ -459,8 +459,9 @@ bool AndroidHostInterface::AcquireHostDisplay() break; } - if (!display->CreateRenderDevice(wi, {}, g_settings.gpu_use_debug_device) || - !display->InitializeRenderDevice(GetShaderCacheBasePath(), g_settings.gpu_use_debug_device)) + if (!display->CreateRenderDevice(wi, {}, g_settings.gpu_use_debug_device, g_settings.gpu_threaded_presentation) || + !display->InitializeRenderDevice(GetShaderCacheBasePath(), g_settings.gpu_use_debug_device, + g_settings.gpu_threaded_presentation)) { ReportError("Failed to acquire host display."); display->DestroyRenderDevice(); diff --git a/android/app/src/main/res/values/strings.xml b/android/app/src/main/res/values/strings.xml index 1f745e58b..17723f2fb 100644 --- a/android/app/src/main/res/values/strings.xml +++ b/android/app/src/main/res/values/strings.xml @@ -153,6 +153,8 @@ Copy Threaded GPU Rendering Uses a second thread for drawing graphics. Currently only available for the software renderer, but can provide a significant speed improvement, and is safe to use. + Threaded GPU Presentation + Presents frames on a background thread when fast forwarding or vsync is disabled. This can measurably improve performance in the Vulkan renderer. Language (restart to apply) Stop Editing Reset Layout diff --git a/android/app/src/main/res/xml/advanced_preferences.xml b/android/app/src/main/res/xml/advanced_preferences.xml index 034b2b425..ad5fd714b 100644 --- a/android/app/src/main/res/xml/advanced_preferences.xml +++ b/android/app/src/main/res/xml/advanced_preferences.xml @@ -49,6 +49,12 @@ app:defaultValue="true" app:summary="@string/settings_summary_gpu_thread" app:iconSpaceReserved="false" /> + * out_swap_chain, - bool enable_debug_reports, bool enable_validation_layer) + bool threaded_presentation, bool enable_debug_reports, bool enable_validation_layer) { AssertMsg(!g_vulkan_context, "Has no current context"); @@ -374,6 +376,9 @@ bool Context::Create(std::string_view gpu_name, const WindowInfo* wi, std::uniqu return false; } + if (threaded_presentation) + g_vulkan_context->StartPresentThread(); + return true; } @@ -810,6 +815,7 @@ void Context::WaitForFenceCounter(u64 fence_counter) void Context::WaitForGPUIdle() { + WaitForPresentComplete(); vkDeviceWaitIdle(m_device); } @@ -843,8 +849,10 @@ void Context::WaitForCommandBufferCompletion(u32 index) m_completed_fence_counter = now_completed_counter; } -void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, - VkSwapchainKHR present_swap_chain, uint32_t present_image_index) +void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore /* = VK_NULL_HANDLE */, + VkSemaphore signal_semaphore /* = VK_NULL_HANDLE */, + VkSwapchainKHR present_swap_chain /* = VK_NULL_HANDLE */, + uint32_t present_image_index /* = 0xFFFFFFFF */, bool submit_on_thread /* = false */) { FrameResources& resources = m_frame_resources[m_current_frame]; @@ -859,7 +867,30 @@ void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore, VkSemaphore signal // This command buffer now has commands, so can't be re-used without waiting. resources.needs_fence_wait = true; - // This may be executed on the worker thread, so don't modify any state of the manager class. + std::unique_lock lock(m_present_mutex); + WaitForPresentComplete(lock); + + if (!submit_on_thread || !m_present_thread.joinable()) + { + DoSubmitCommandBuffer(m_current_frame, wait_semaphore, signal_semaphore); + if (present_swap_chain != VK_NULL_HANDLE) + DoPresent(signal_semaphore, present_swap_chain, present_image_index); + return; + } + + m_queued_present.command_buffer_index = m_current_frame; + m_queued_present.present_swap_chain = present_swap_chain; + m_queued_present.present_image_index = present_image_index; + m_queued_present.wait_semaphore = wait_semaphore; + m_queued_present.signal_semaphore = signal_semaphore; + m_present_done.store(false); + m_present_queued_cv.notify_one(); +} + +void Context::DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore) +{ + FrameResources& resources = m_frame_resources[index]; + uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 0, nullptr, &wait_bits, 1u, &resources.command_buffer, 0, nullptr}; @@ -876,39 +907,93 @@ void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore, VkSemaphore signal submit_info.pSignalSemaphores = &signal_semaphore; } - res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence); + VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence); if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: "); Panic("Failed to submit command buffer."); } +} - // Do we have a swap chain to present? - if (present_swap_chain != VK_NULL_HANDLE) +void Context::DoPresent(VkSemaphore wait_semaphore, VkSwapchainKHR present_swap_chain, uint32_t present_image_index) +{ + // Should have a signal semaphore. + Assert(wait_semaphore != VK_NULL_HANDLE); + VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + nullptr, + 1, + &wait_semaphore, + 1, + &present_swap_chain, + &present_image_index, + nullptr}; + + VkResult res = vkQueuePresentKHR(m_present_queue, &present_info); + if (res != VK_SUCCESS) { - // Should have a signal semaphore. - Assert(signal_semaphore != VK_NULL_HANDLE); - VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, - nullptr, - 1, - &signal_semaphore, - 1, - &present_swap_chain, - &present_image_index, - nullptr}; + // VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain. + if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR) + LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: "); - res = vkQueuePresentKHR(m_present_queue, &present_info); - if (res != VK_SUCCESS) - { - // VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain. - if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR) - LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: "); - - m_last_present_failed = true; - } + m_last_present_failed.store(true); } } +void Context::WaitForPresentComplete() +{ + std::unique_lock lock(m_present_mutex); + WaitForPresentComplete(lock); +} + +void Context::WaitForPresentComplete(std::unique_lock& lock) +{ + if (m_present_done.load()) + return; + + m_present_done_cv.wait(lock, [this]() { return m_present_done.load(); }); +} + +void Context::PresentThread() +{ + std::unique_lock lock(m_present_mutex); + while (!m_present_thread_done.load()) + { + m_present_queued_cv.wait(lock, [this]() { return !m_present_done.load() || m_present_thread_done.load(); }); + + if (m_present_done.load()) + continue; + + DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.wait_semaphore, + m_queued_present.signal_semaphore); + DoPresent(m_queued_present.signal_semaphore, m_queued_present.present_swap_chain, + m_queued_present.present_image_index); + m_present_done.store(true); + m_present_done_cv.notify_one(); + } +} + +void Context::StartPresentThread() +{ + Assert(!m_present_thread.joinable()); + m_present_thread_done.store(false); + m_present_thread = std::thread(&Context::PresentThread, this); +} + +void Context::StopPresentThread() +{ + if (!m_present_thread.joinable()) + return; + + { + std::unique_lock lock(m_present_mutex); + WaitForPresentComplete(lock); + m_present_thread_done.store(true); + m_present_queued_cv.notify_one(); + } + + m_present_thread.join(); +} + void Context::MoveToNextCommandBuffer() { ActivateCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS); @@ -918,6 +1003,9 @@ void Context::ActivateCommandBuffer(u32 index) { FrameResources& resources = m_frame_resources[index]; + if (!m_present_done.load() && m_queued_present.command_buffer_index == index) + WaitForPresentComplete(); + // Wait for the GPU to finish with all resources for this command buffer. if (resources.fence_counter > m_completed_fence_counter) WaitForCommandBufferCompletion(index); diff --git a/src/common/vulkan/context.h b/src/common/vulkan/context.h index 6fec69044..48b3a76c3 100644 --- a/src/common/vulkan/context.h +++ b/src/common/vulkan/context.h @@ -8,10 +8,14 @@ #include "../types.h" #include "vulkan_loader.h" #include +#include +#include #include #include #include +#include #include +#include #include struct WindowInfo; @@ -44,7 +48,7 @@ public: // Creates a new context and sets it up as global. static bool Create(std::string_view gpu_name, const WindowInfo* wi, std::unique_ptr* out_swap_chain, - bool enable_debug_reports, bool enable_validation_layer); + bool threaded_presentation, bool enable_debug_reports, bool enable_validation_layer); // Creates a new context from a pre-existing instance. static bool CreateFromExistingInstance(VkInstance instance, VkPhysicalDevice gpu, VkSurfaceKHR surface, @@ -147,10 +151,11 @@ public: void SubmitCommandBuffer(VkSemaphore wait_semaphore = VK_NULL_HANDLE, VkSemaphore signal_semaphore = VK_NULL_HANDLE, VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE, - uint32_t present_image_index = 0xFFFFFFFF); + uint32_t present_image_index = 0xFFFFFFFF, bool submit_on_thread = false); void MoveToNextCommandBuffer(); void ExecuteCommandBuffer(bool wait_for_completion); + void WaitForPresentComplete(); // Was the last present submitted to the queue a failure? If so, we must recreate our swapchain. bool CheckLastPresentFail(); @@ -191,6 +196,13 @@ private: void ActivateCommandBuffer(u32 index); void WaitForCommandBufferCompletion(u32 index); + void DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore); + void DoPresent(VkSemaphore wait_semaphore, VkSwapchainKHR present_swap_chain, uint32_t present_image_index); + void WaitForPresentComplete(std::unique_lock& lock); + void PresentThread(); + void StartPresentThread(); + void StopPresentThread(); + struct FrameResources { // [0] - Init (upload) command buffer, [1] - draw command buffer @@ -223,7 +235,25 @@ private: u32 m_current_frame; bool m_owns_device = false; - bool m_last_present_failed = false; + + std::atomic_bool m_last_present_failed{false}; + std::atomic_bool m_present_done{true}; + std::mutex m_present_mutex; + std::condition_variable m_present_queued_cv; + std::condition_variable m_present_done_cv; + std::thread m_present_thread; + std::atomic_bool m_present_thread_done{false}; + + struct QueuedPresent + { + VkSemaphore wait_semaphore; + VkSemaphore signal_semaphore; + VkSwapchainKHR present_swap_chain; + u32 command_buffer_index; + u32 present_image_index; + }; + + QueuedPresent m_queued_present = {}; // Render pass cache using RenderPassCacheKey = std::tuple; diff --git a/src/core/host_display.h b/src/core/host_display.h index a0f0ff18c..f141050a1 100644 --- a/src/core/host_display.h +++ b/src/core/host_display.h @@ -71,8 +71,10 @@ public: virtual bool HasRenderDevice() const = 0; virtual bool HasRenderSurface() const = 0; - virtual bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) = 0; - virtual bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) = 0; + virtual bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) = 0; + virtual bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, + bool threaded_presentation) = 0; virtual bool MakeRenderContextCurrent() = 0; virtual bool DoneRenderContextCurrent() = 0; virtual void DestroyRenderDevice() = 0; diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index 2c00af578..aec841acb 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -474,6 +474,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) si.SetBoolValue("GPU", "UseDebugDevice", false); si.SetBoolValue("GPU", "PerSampleShading", false); si.SetBoolValue("GPU", "UseThread", true); + si.SetBoolValue("GPU", "ThreadedPresentation", true); si.SetBoolValue("GPU", "TrueColor", false); si.SetBoolValue("GPU", "ScaledDithering", true); si.SetStringValue("GPU", "TextureFilter", Settings::GetTextureFilterName(Settings::DEFAULT_GPU_TEXTURE_FILTER)); @@ -613,7 +614,8 @@ void HostInterface::SaveSettings(SettingsInterface& si) void HostInterface::CheckForSettingsChanges(const Settings& old_settings) { if (System::IsValid() && (g_settings.gpu_renderer != old_settings.gpu_renderer || - g_settings.gpu_use_debug_device != old_settings.gpu_use_debug_device)) + g_settings.gpu_use_debug_device != old_settings.gpu_use_debug_device || + g_settings.gpu_threaded_presentation != old_settings.gpu_threaded_presentation)) { AddFormattedOSDMessage(5.0f, TranslateString("OSDMessage", "Switching to %s%s GPU renderer."), Settings::GetRendererName(g_settings.gpu_renderer), diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 178160791..a73ea9246 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -143,6 +143,7 @@ void Settings::Load(SettingsInterface& si) gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false); gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false); gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true); + gpu_threaded_presentation = si.GetBoolValue("GPU", "ThreadedPresentation", true); gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true); gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false); gpu_texture_filter = @@ -299,6 +300,7 @@ void Settings::Save(SettingsInterface& si) const si.SetBoolValue("GPU", "UseDebugDevice", gpu_use_debug_device); si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading); si.SetBoolValue("GPU", "UseThread", gpu_use_thread); + si.SetBoolValue("GPU", "ThreadedPresentation", gpu_threaded_presentation); si.SetBoolValue("GPU", "TrueColor", gpu_true_color); si.SetBoolValue("GPU", "ScaledDithering", gpu_scaled_dithering); si.SetStringValue("GPU", "TextureFilter", GetTextureFilterName(gpu_texture_filter)); diff --git a/src/core/settings.h b/src/core/settings.h index eec5b40e2..a4b929bba 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -97,6 +97,7 @@ struct Settings u32 gpu_resolution_scale = 1; u32 gpu_multisamples = 1; bool gpu_use_thread = true; + bool gpu_threaded_presentation = true; bool gpu_use_debug_device = false; bool gpu_per_sample_shading = false; bool gpu_true_color = true; diff --git a/src/duckstation-libretro/libretro_host_display.cpp b/src/duckstation-libretro/libretro_host_display.cpp index 0adbb61fe..b4bf7bf29 100644 --- a/src/duckstation-libretro/libretro_host_display.cpp +++ b/src/duckstation-libretro/libretro_host_display.cpp @@ -81,13 +81,15 @@ bool LibretroHostDisplay::HasRenderSurface() const return true; } -bool LibretroHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) +bool LibretroHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) { m_window_info = wi; return true; } -bool LibretroHostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) +bool LibretroHostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, + bool threaded_presentation) { return true; } diff --git a/src/duckstation-libretro/libretro_host_display.h b/src/duckstation-libretro/libretro_host_display.h index fa0def33e..ec6778d89 100644 --- a/src/duckstation-libretro/libretro_host_display.h +++ b/src/duckstation-libretro/libretro_host_display.h @@ -15,8 +15,10 @@ public: bool HasRenderDevice() const override; bool HasRenderSurface() const override; - bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) override; - bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) override; + bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) override; + bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, + bool threaded_presentation) override; void DestroyRenderDevice() override; bool MakeRenderContextCurrent() override; diff --git a/src/duckstation-libretro/libretro_host_interface.cpp b/src/duckstation-libretro/libretro_host_interface.cpp index ead66ebfd..60de1e0aa 100644 --- a/src/duckstation-libretro/libretro_host_interface.cpp +++ b/src/duckstation-libretro/libretro_host_interface.cpp @@ -1297,8 +1297,8 @@ void LibretroHostInterface::SwitchToHardwareRenderer() Log_ErrorPrintf("Unhandled renderer '%s'", Settings::GetRendererName(renderer.value())); return; } - if (!display || !display->CreateRenderDevice(wi, {}, g_settings.gpu_use_debug_device) || - !display->InitializeRenderDevice(GetShaderCacheBasePath(), g_settings.gpu_use_debug_device)) + if (!display || !display->CreateRenderDevice(wi, {}, g_settings.gpu_use_debug_device, false) || + !display->InitializeRenderDevice(GetShaderCacheBasePath(), g_settings.gpu_use_debug_device, false)) { Log_ErrorPrintf("Failed to create hardware host display"); return; diff --git a/src/duckstation-libretro/libretro_opengl_host_display.cpp b/src/duckstation-libretro/libretro_opengl_host_display.cpp index 6415b1589..ffe4da213 100644 --- a/src/duckstation-libretro/libretro_opengl_host_display.cpp +++ b/src/duckstation-libretro/libretro_opengl_host_display.cpp @@ -98,7 +98,7 @@ bool LibretroOpenGLHostDisplay::RequestHardwareRendererContext(retro_hw_render_c } bool LibretroOpenGLHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, - bool debug_device) + bool debug_device, bool threaded_presentation) { Assert(wi.type == WindowInfo::Type::Libretro); diff --git a/src/duckstation-libretro/libretro_opengl_host_display.h b/src/duckstation-libretro/libretro_opengl_host_display.h index 3049248cb..9bd74f37d 100644 --- a/src/duckstation-libretro/libretro_opengl_host_display.h +++ b/src/duckstation-libretro/libretro_opengl_host_display.h @@ -4,8 +4,8 @@ #include "core/host_display.h" #include "frontend-common/opengl_host_display.h" #include "libretro.h" -#include #include +#include class LibretroOpenGLHostDisplay final : public FrontendCommon::OpenGLHostDisplay { @@ -17,7 +17,8 @@ public: RenderAPI GetRenderAPI() const override; - bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) override; + bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) override; void DestroyRenderDevice() override; void ResizeRenderWindow(s32 new_window_width, s32 new_window_height) override; diff --git a/src/duckstation-libretro/libretro_vulkan_host_display.cpp b/src/duckstation-libretro/libretro_vulkan_host_display.cpp index 03697f30c..5668eaabe 100644 --- a/src/duckstation-libretro/libretro_vulkan_host_display.cpp +++ b/src/duckstation-libretro/libretro_vulkan_host_display.cpp @@ -85,7 +85,7 @@ bool LibretroVulkanHostDisplay::RequestHardwareRendererContext(retro_hw_render_c } bool LibretroVulkanHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, - bool debug_device) + bool debug_device, bool threaded_presentation) { retro_hw_render_interface* ri = nullptr; if (!g_retro_environment_callback(RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE, &ri)) diff --git a/src/duckstation-libretro/libretro_vulkan_host_display.h b/src/duckstation-libretro/libretro_vulkan_host_display.h index 5a55fc121..afea255fe 100644 --- a/src/duckstation-libretro/libretro_vulkan_host_display.h +++ b/src/duckstation-libretro/libretro_vulkan_host_display.h @@ -14,7 +14,8 @@ public: static bool RequestHardwareRendererContext(retro_hw_render_callback* cb); - bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) override; + bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) override; void DestroyRenderDevice() override; void ResizeRenderWindow(s32 new_window_width, s32 new_window_height) override; diff --git a/src/duckstation-qt/displaysettingswidget.cpp b/src/duckstation-qt/displaysettingswidget.cpp index f753049da..d4d007436 100644 --- a/src/duckstation-qt/displaysettingswidget.cpp +++ b/src/duckstation-qt/displaysettingswidget.cpp @@ -34,6 +34,8 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW "IntegerScaling"); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.vsync, "Display", "VSync"); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.gpuThread, "GPU", "UseThread", true); + SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.threadedPresentation, "GPU", + "ThreadedPresentation", true); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showOSDMessages, "Display", "ShowOSDMessages", true); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showFPS, "Display", "ShowFPS", false); @@ -85,6 +87,9 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW m_ui.vsync, tr("VSync"), tr("Checked"), tr("Enable this option to match DuckStation's refresh rate with your current monitor or screen. " "VSync is automatically disabled when it is not possible (e.g. running at non-100% speed).")); + dialog->registerWidgetHelp(m_ui.threadedPresentation, tr("Threaded Presentation"), tr("Checked"), + tr("Presents frames on a background thread when fast forwarding or vsync is disabled. " + "This can measurably improve performance in the Vulkan renderer.")); dialog->registerWidgetHelp(m_ui.gpuThread, tr("Threaded Rendering"), tr("Checked"), tr("Uses a second thread for drawing graphics. Currently only available for the software " "renderer, but can provide a significant speed improvement, and is safe to use.")); @@ -104,7 +109,7 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW { QCheckBox* cb = new QCheckBox(tr("Use Blit Swap Chain"), m_ui.basicGroupBox); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, cb, "Display", "UseBlitSwapChain", false); - m_ui.basicCheckboxGridLayout->addWidget(cb, 1, 0, 1, 1); + m_ui.basicCheckboxGridLayout->addWidget(cb, 1, 1, 1, 1); dialog->registerWidgetHelp(cb, tr("Use Blit Swap Chain"), tr("Unchecked"), tr("Uses a blit presentation model instead of flipping when using the Direct3D 11 " "renderer. This usually results in slower performance, but may be required for some " @@ -141,6 +146,7 @@ void DisplaySettingsWidget::populateGPUAdaptersAndResolutions() std::vector adapter_names; std::vector fullscreen_modes; bool thread_supported = false; + bool threaded_presentation_supported = false; switch (static_cast(m_ui.renderer->currentIndex())) { #ifdef WIN32 @@ -155,6 +161,7 @@ void DisplaySettingsWidget::populateGPUAdaptersAndResolutions() case GPURenderer::HardwareVulkan: adapter_names = FrontendCommon::VulkanHostDisplay::EnumerateAdapterNames(); + threaded_presentation_supported = true; break; case GPURenderer::Software: @@ -207,6 +214,7 @@ void DisplaySettingsWidget::populateGPUAdaptersAndResolutions() } m_ui.gpuThread->setEnabled(thread_supported); + m_ui.threadedPresentation->setEnabled(threaded_presentation_supported); } void DisplaySettingsWidget::onGPUAdapterIndexChanged() diff --git a/src/duckstation-qt/displaysettingswidget.ui b/src/duckstation-qt/displaysettingswidget.ui index 02971ce44..0e6435988 100644 --- a/src/duckstation-qt/displaysettingswidget.ui +++ b/src/duckstation-qt/displaysettingswidget.ui @@ -78,6 +78,13 @@ + + + + Threaded Presentation + + + diff --git a/src/duckstation-qt/mainwindow.cpp b/src/duckstation-qt/mainwindow.cpp index f4fda8dea..fa11e8d9e 100644 --- a/src/duckstation-qt/mainwindow.cpp +++ b/src/duckstation-qt/mainwindow.cpp @@ -90,8 +90,7 @@ bool MainWindow::shouldHideCursorInFullscreen() const return g_host_interface->GetBoolSettingValue("Main", "HideCursorInFullscreen", true); } -QtDisplayWidget* MainWindow::createDisplay(QThread* worker_thread, const QString& adapter_name, bool use_debug_device, - bool fullscreen, bool render_to_main) +QtDisplayWidget* MainWindow::createDisplay(QThread* worker_thread, bool fullscreen, bool render_to_main) { Assert(!m_host_display && !m_display_widget); Assert(!fullscreen || !render_to_main); @@ -143,7 +142,8 @@ QtDisplayWidget* MainWindow::createDisplay(QThread* worker_thread, const QString return nullptr; } - if (!m_host_display->CreateRenderDevice(wi.value(), adapter_name.toStdString(), use_debug_device)) + if (!m_host_display->CreateRenderDevice(wi.value(), g_settings.gpu_adapter, g_settings.gpu_use_debug_device, + g_settings.gpu_threaded_presentation)) { reportError(tr("Failed to create host display device context.")); destroyDisplayWidget(); @@ -815,11 +815,14 @@ void MainWindow::updateEmulationActions(bool starting, bool running) } } - if (g_settings.debugging.enable_gdb_server) { - if (starting && !m_gdb_server) { + if (g_settings.debugging.enable_gdb_server) + { + if (starting && !m_gdb_server) + { m_gdb_server = new GDBServer(this, g_settings.debugging.gdb_server_port); } - else if (!running && m_gdb_server) { + else if (!running && m_gdb_server) + { delete m_gdb_server; m_gdb_server = nullptr; } diff --git a/src/duckstation-qt/mainwindow.h b/src/duckstation-qt/mainwindow.h index 19ba30612..beceffb01 100644 --- a/src/duckstation-qt/mainwindow.h +++ b/src/duckstation-qt/mainwindow.h @@ -47,8 +47,7 @@ private Q_SLOTS: void reportError(const QString& message); void reportMessage(const QString& message); bool confirmMessage(const QString& message); - QtDisplayWidget* createDisplay(QThread* worker_thread, const QString& adapter_name, bool use_debug_device, - bool fullscreen, bool render_to_main); + QtDisplayWidget* createDisplay(QThread* worker_thread, bool fullscreen, bool render_to_main); QtDisplayWidget* updateDisplay(QThread* worker_thread, bool fullscreen, bool render_to_main); void displaySizeRequested(qint32 width, qint32 height); void destroyDisplay(); diff --git a/src/duckstation-qt/qthostinterface.cpp b/src/duckstation-qt/qthostinterface.cpp index 258b66f74..f5ccdb509 100644 --- a/src/duckstation-qt/qthostinterface.cpp +++ b/src/duckstation-qt/qthostinterface.cpp @@ -488,9 +488,7 @@ bool QtHostInterface::AcquireHostDisplay() m_is_rendering_to_main = m_settings_interface->GetBoolValue("Main", "RenderToMainWindow", true); - QtDisplayWidget* display_widget = - createDisplayRequested(m_worker_thread, QString::fromStdString(g_settings.gpu_adapter), - g_settings.gpu_use_debug_device, m_is_fullscreen, m_is_rendering_to_main); + QtDisplayWidget* display_widget = createDisplayRequested(m_worker_thread, m_is_fullscreen, m_is_rendering_to_main); if (!display_widget || !m_display->HasRenderDevice()) { emit destroyDisplayRequested(); @@ -501,7 +499,8 @@ bool QtHostInterface::AcquireHostDisplay() createImGuiContext(display_widget->devicePixelRatioFromScreen()); if (!m_display->MakeRenderContextCurrent() || - !m_display->InitializeRenderDevice(GetShaderCacheBasePath(), g_settings.gpu_use_debug_device) || + !m_display->InitializeRenderDevice(GetShaderCacheBasePath(), g_settings.gpu_use_debug_device, + g_settings.gpu_threaded_presentation) || !CreateHostDisplayResources()) { destroyImGuiContext(); diff --git a/src/duckstation-qt/qthostinterface.h b/src/duckstation-qt/qthostinterface.h index d7ceda635..2cb3e6827 100644 --- a/src/duckstation-qt/qthostinterface.h +++ b/src/duckstation-qt/qthostinterface.h @@ -129,8 +129,7 @@ Q_SIGNALS: void emulationPaused(bool paused); void stateSaved(const QString& game_code, bool global, qint32 slot); void gameListRefreshed(); - QtDisplayWidget* createDisplayRequested(QThread* worker_thread, const QString& adapter_name, bool use_debug_device, - bool fullscreen, bool render_to_main); + QtDisplayWidget* createDisplayRequested(QThread* worker_thread, bool fullscreen, bool render_to_main); QtDisplayWidget* updateDisplayRequested(QThread* worker_thread, bool fullscreen, bool render_to_main); void displaySizeRequested(qint32 width, qint32 height); void focusDisplayWidgetRequested(); diff --git a/src/duckstation-sdl/sdl_host_interface.cpp b/src/duckstation-sdl/sdl_host_interface.cpp index 141fa5b23..cce2ad925 100644 --- a/src/duckstation-sdl/sdl_host_interface.cpp +++ b/src/duckstation-sdl/sdl_host_interface.cpp @@ -132,8 +132,10 @@ bool SDLHostInterface::CreateDisplay() } Assert(display); - if (!display->CreateRenderDevice(wi.value(), g_settings.gpu_adapter, g_settings.gpu_use_debug_device) || - !display->InitializeRenderDevice(GetShaderCacheBasePath(), g_settings.gpu_use_debug_device)) + if (!display->CreateRenderDevice(wi.value(), g_settings.gpu_adapter, g_settings.gpu_use_debug_device, + g_settings.gpu_threaded_presentation) || + !display->InitializeRenderDevice(GetShaderCacheBasePath(), g_settings.gpu_use_debug_device, + g_settings.gpu_threaded_presentation)) { ReportError("Failed to create/initialize display render device"); return false; diff --git a/src/frontend-common/d3d11_host_display.cpp b/src/frontend-common/d3d11_host_display.cpp index 4304e7455..86172af72 100644 --- a/src/frontend-common/d3d11_host_display.cpp +++ b/src/frontend-common/d3d11_host_display.cpp @@ -241,7 +241,8 @@ void D3D11HostDisplay::SetVSync(bool enabled) #endif } -bool D3D11HostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) +bool D3D11HostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) { #ifndef LIBRETRO UINT create_flags = 0; @@ -352,7 +353,8 @@ bool D3D11HostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view return true; } -bool D3D11HostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) +bool D3D11HostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, + bool threaded_presentation) { #ifndef LIBRETRO if (m_window_info.type != WindowInfo::Type::Surfaceless && m_window_info.type != WindowInfo::Type::Libretro && diff --git a/src/frontend-common/d3d11_host_display.h b/src/frontend-common/d3d11_host_display.h index 673018d31..2827413f5 100644 --- a/src/frontend-common/d3d11_host_display.h +++ b/src/frontend-common/d3d11_host_display.h @@ -35,8 +35,8 @@ public: virtual bool HasRenderDevice() const override; virtual bool HasRenderSurface() const override; - virtual bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) override; - virtual bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) override; + virtual bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, bool threaded_presentation) override; + virtual bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, bool threaded_presentation) override; virtual void DestroyRenderDevice() override; virtual bool MakeRenderContextCurrent() override; diff --git a/src/frontend-common/opengl_host_display.cpp b/src/frontend-common/opengl_host_display.cpp index 792d50710..6e7cbf9b6 100644 --- a/src/frontend-common/opengl_host_display.cpp +++ b/src/frontend-common/opengl_host_display.cpp @@ -318,7 +318,8 @@ bool OpenGLHostDisplay::HasRenderSurface() const return m_window_info.type != WindowInfo::Type::Surfaceless; } -bool OpenGLHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) +bool OpenGLHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) { m_gl_context = GL::Context::Create(wi); if (!m_gl_context) @@ -333,7 +334,8 @@ bool OpenGLHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_vie return true; } -bool OpenGLHostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) +bool OpenGLHostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, + bool threaded_presentation) { glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast(&m_uniform_buffer_alignment)); diff --git a/src/frontend-common/opengl_host_display.h b/src/frontend-common/opengl_host_display.h index b9b10fc0e..b37b279ab 100644 --- a/src/frontend-common/opengl_host_display.h +++ b/src/frontend-common/opengl_host_display.h @@ -35,8 +35,8 @@ public: virtual bool HasRenderDevice() const override; virtual bool HasRenderSurface() const override; - virtual bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) override; - virtual bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) override; + virtual bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, bool threaded_presentation) override; + virtual bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, bool threaded_presentation) override; virtual void DestroyRenderDevice() override; virtual bool MakeRenderContextCurrent() override; diff --git a/src/frontend-common/vulkan_host_display.cpp b/src/frontend-common/vulkan_host_display.cpp index 947b9ad37..a2bf713b6 100644 --- a/src/frontend-common/vulkan_host_display.cpp +++ b/src/frontend-common/vulkan_host_display.cpp @@ -192,6 +192,7 @@ bool VulkanHostDisplay::SetFullscreen(bool fullscreen, u32 width, u32 height, fl void VulkanHostDisplay::DestroyRenderSurface() { m_window_info = {}; + g_vulkan_context->WaitForGPUIdle(); m_swap_chain.reset(); } @@ -310,9 +311,10 @@ void VulkanHostDisplay::SetVSync(bool enabled) m_swap_chain->SetVSync(enabled); } -bool VulkanHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) +bool VulkanHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) { - if (!Vulkan::Context::Create(adapter_name, &wi, &m_swap_chain, debug_device, false)) + if (!Vulkan::Context::Create(adapter_name, &wi, &m_swap_chain, threaded_presentation, debug_device, false)) { Log_ErrorPrintf("Failed to create Vulkan context"); return false; @@ -328,7 +330,7 @@ bool VulkanHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_vie return true; } -bool VulkanHostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) +bool VulkanHostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, bool threaded_presentation) { Vulkan::ShaderCache::Create(shader_cache_directory, debug_device); @@ -648,7 +650,7 @@ bool VulkanHostDisplay::Render() g_vulkan_context->SubmitCommandBuffer(m_swap_chain->GetImageAvailableSemaphore(), m_swap_chain->GetRenderingFinishedSemaphore(), m_swap_chain->GetSwapChain(), - m_swap_chain->GetCurrentImageIndex()); + m_swap_chain->GetCurrentImageIndex(), !m_swap_chain->IsVSyncEnabled()); g_vulkan_context->MoveToNextCommandBuffer(); #ifdef WITH_IMGUI diff --git a/src/frontend-common/vulkan_host_display.h b/src/frontend-common/vulkan_host_display.h index 31346f242..7cdc57b92 100644 --- a/src/frontend-common/vulkan_host_display.h +++ b/src/frontend-common/vulkan_host_display.h @@ -32,8 +32,10 @@ public: virtual bool HasRenderDevice() const override; virtual bool HasRenderSurface() const override; - virtual bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device) override; - virtual bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) override; + virtual bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool threaded_presentation) override; + virtual bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, + bool threaded_presentation) override; virtual void DestroyRenderDevice() override; virtual bool MakeRenderContextCurrent() override;