diff --git a/android/app/src/cpp/android_host_interface.cpp b/android/app/src/cpp/android_host_interface.cpp
index 6fd0796c5..b9a7b5155 100644
--- a/android/app/src/cpp/android_host_interface.cpp
+++ b/android/app/src/cpp/android_host_interface.cpp
@@ -459,8 +459,9 @@ bool AndroidHostInterface::AcquireHostDisplay()
break;
}
- if (!display->CreateRenderDevice(wi, {}, g_settings.gpu_use_debug_device) ||
- !display->InitializeRenderDevice(GetShaderCacheBasePath(), g_settings.gpu_use_debug_device))
+ if (!display->CreateRenderDevice(wi, {}, g_settings.gpu_use_debug_device, g_settings.gpu_threaded_presentation) ||
+ !display->InitializeRenderDevice(GetShaderCacheBasePath(), g_settings.gpu_use_debug_device,
+ g_settings.gpu_threaded_presentation))
{
ReportError("Failed to acquire host display.");
display->DestroyRenderDevice();
diff --git a/android/app/src/main/res/values/strings.xml b/android/app/src/main/res/values/strings.xml
index 1f745e58b..17723f2fb 100644
--- a/android/app/src/main/res/values/strings.xml
+++ b/android/app/src/main/res/values/strings.xml
@@ -153,6 +153,8 @@
Copy
Threaded GPU Rendering
Uses a second thread for drawing graphics. Currently only available for the software renderer, but can provide a significant speed improvement, and is safe to use.
+ Threaded GPU Presentation
+ Presents frames on a background thread when fast forwarding or vsync is disabled. This can measurably improve performance in the Vulkan renderer.
Language (restart to apply)
Stop Editing
Reset Layout
diff --git a/android/app/src/main/res/xml/advanced_preferences.xml b/android/app/src/main/res/xml/advanced_preferences.xml
index 034b2b425..ad5fd714b 100644
--- a/android/app/src/main/res/xml/advanced_preferences.xml
+++ b/android/app/src/main/res/xml/advanced_preferences.xml
@@ -49,6 +49,12 @@
app:defaultValue="true"
app:summary="@string/settings_summary_gpu_thread"
app:iconSpaceReserved="false" />
+
* out_swap_chain,
- bool enable_debug_reports, bool enable_validation_layer)
+ bool threaded_presentation, bool enable_debug_reports, bool enable_validation_layer)
{
AssertMsg(!g_vulkan_context, "Has no current context");
@@ -374,6 +376,9 @@ bool Context::Create(std::string_view gpu_name, const WindowInfo* wi, std::uniqu
return false;
}
+ if (threaded_presentation)
+ g_vulkan_context->StartPresentThread();
+
return true;
}
@@ -810,6 +815,7 @@ void Context::WaitForFenceCounter(u64 fence_counter)
void Context::WaitForGPUIdle()
{
+ WaitForPresentComplete();
vkDeviceWaitIdle(m_device);
}
@@ -843,8 +849,10 @@ void Context::WaitForCommandBufferCompletion(u32 index)
m_completed_fence_counter = now_completed_counter;
}
-void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore, VkSemaphore signal_semaphore,
- VkSwapchainKHR present_swap_chain, uint32_t present_image_index)
+void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore /* = VK_NULL_HANDLE */,
+ VkSemaphore signal_semaphore /* = VK_NULL_HANDLE */,
+ VkSwapchainKHR present_swap_chain /* = VK_NULL_HANDLE */,
+ uint32_t present_image_index /* = 0xFFFFFFFF */, bool submit_on_thread /* = false */)
{
FrameResources& resources = m_frame_resources[m_current_frame];
@@ -859,7 +867,30 @@ void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore, VkSemaphore signal
// This command buffer now has commands, so can't be re-used without waiting.
resources.needs_fence_wait = true;
- // This may be executed on the worker thread, so don't modify any state of the manager class.
+ std::unique_lock lock(m_present_mutex);
+ WaitForPresentComplete(lock);
+
+ if (!submit_on_thread || !m_present_thread.joinable())
+ {
+ DoSubmitCommandBuffer(m_current_frame, wait_semaphore, signal_semaphore);
+ if (present_swap_chain != VK_NULL_HANDLE)
+ DoPresent(signal_semaphore, present_swap_chain, present_image_index);
+ return;
+ }
+
+ m_queued_present.command_buffer_index = m_current_frame;
+ m_queued_present.present_swap_chain = present_swap_chain;
+ m_queued_present.present_image_index = present_image_index;
+ m_queued_present.wait_semaphore = wait_semaphore;
+ m_queued_present.signal_semaphore = signal_semaphore;
+ m_present_done.store(false);
+ m_present_queued_cv.notify_one();
+}
+
+void Context::DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore)
+{
+ FrameResources& resources = m_frame_resources[index];
+
uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 0, nullptr, &wait_bits, 1u,
&resources.command_buffer, 0, nullptr};
@@ -876,39 +907,93 @@ void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore, VkSemaphore signal
submit_info.pSignalSemaphores = &signal_semaphore;
}
- res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
+ VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: ");
Panic("Failed to submit command buffer.");
}
+}
- // Do we have a swap chain to present?
- if (present_swap_chain != VK_NULL_HANDLE)
+void Context::DoPresent(VkSemaphore wait_semaphore, VkSwapchainKHR present_swap_chain, uint32_t present_image_index)
+{
+ // Should have a signal semaphore.
+ Assert(wait_semaphore != VK_NULL_HANDLE);
+ VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+ nullptr,
+ 1,
+ &wait_semaphore,
+ 1,
+ &present_swap_chain,
+ &present_image_index,
+ nullptr};
+
+ VkResult res = vkQueuePresentKHR(m_present_queue, &present_info);
+ if (res != VK_SUCCESS)
{
- // Should have a signal semaphore.
- Assert(signal_semaphore != VK_NULL_HANDLE);
- VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
- nullptr,
- 1,
- &signal_semaphore,
- 1,
- &present_swap_chain,
- &present_image_index,
- nullptr};
+ // VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
+ if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR)
+ LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");
- res = vkQueuePresentKHR(m_present_queue, &present_info);
- if (res != VK_SUCCESS)
- {
- // VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
- if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR)
- LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");
-
- m_last_present_failed = true;
- }
+ m_last_present_failed.store(true);
}
}
+void Context::WaitForPresentComplete()
+{
+ std::unique_lock lock(m_present_mutex);
+ WaitForPresentComplete(lock);
+}
+
+void Context::WaitForPresentComplete(std::unique_lock& lock)
+{
+ if (m_present_done.load())
+ return;
+
+ m_present_done_cv.wait(lock, [this]() { return m_present_done.load(); });
+}
+
+void Context::PresentThread()
+{
+ std::unique_lock lock(m_present_mutex);
+ while (!m_present_thread_done.load())
+ {
+ m_present_queued_cv.wait(lock, [this]() { return !m_present_done.load() || m_present_thread_done.load(); });
+
+ if (m_present_done.load())
+ continue;
+
+ DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.wait_semaphore,
+ m_queued_present.signal_semaphore);
+ DoPresent(m_queued_present.signal_semaphore, m_queued_present.present_swap_chain,
+ m_queued_present.present_image_index);
+ m_present_done.store(true);
+ m_present_done_cv.notify_one();
+ }
+}
+
+void Context::StartPresentThread()
+{
+ Assert(!m_present_thread.joinable());
+ m_present_thread_done.store(false);
+ m_present_thread = std::thread(&Context::PresentThread, this);
+}
+
+void Context::StopPresentThread()
+{
+ if (!m_present_thread.joinable())
+ return;
+
+ {
+ std::unique_lock lock(m_present_mutex);
+ WaitForPresentComplete(lock);
+ m_present_thread_done.store(true);
+ m_present_queued_cv.notify_one();
+ }
+
+ m_present_thread.join();
+}
+
void Context::MoveToNextCommandBuffer()
{
ActivateCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS);
@@ -918,6 +1003,9 @@ void Context::ActivateCommandBuffer(u32 index)
{
FrameResources& resources = m_frame_resources[index];
+ if (!m_present_done.load() && m_queued_present.command_buffer_index == index)
+ WaitForPresentComplete();
+
// Wait for the GPU to finish with all resources for this command buffer.
if (resources.fence_counter > m_completed_fence_counter)
WaitForCommandBufferCompletion(index);
diff --git a/src/common/vulkan/context.h b/src/common/vulkan/context.h
index 6fec69044..48b3a76c3 100644
--- a/src/common/vulkan/context.h
+++ b/src/common/vulkan/context.h
@@ -8,10 +8,14 @@
#include "../types.h"
#include "vulkan_loader.h"
#include
+#include
+#include
#include
#include