diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index da7f89b1be..0f78cd158a 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -26,6 +26,9 @@ #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoConfig.h" +// We should factor out video backpressure into a shared_ptr later. +#include "VideoCommon/PendingTimeOffset.h" + namespace CoreTiming { // Sort by time, unless the times are the same, in which case sort by the order added to the queue @@ -346,6 +349,8 @@ void CoreTimingManager::Advance() void CoreTimingManager::Throttle(const s64 target_cycle) { + using namespace std::chrono; + // Based on number of cycles and emulation speed, increase the target deadline const s64 cycles = target_cycle - m_throttle_last_cycle; @@ -358,9 +363,38 @@ void CoreTimingManager::Throttle(const s64 target_cycle) const double speed = Core::GetIsThrottlerTempDisabled() ? 0.0 : Config::Get(Config::MAIN_EMULATION_SPEED); - if (0.0 < speed) - m_throttle_deadline += - std::chrono::duration_cast
(DT_s(cycles) / (speed * m_throttle_clock_per_sec)); + DT applied_offset = DT::zero(); + + if (0.0 < speed) { + ASSERT(cycles >= 0); + + const DT sleep_dur = std::chrono::duration_cast
(DT_s(cycles) / (speed * m_throttle_clock_per_sec)); + + DT pending_offset; + { + std::unique_lock lk{s_pending_time_offset.Lock}; + pending_offset = s_pending_time_offset.Offset__; + } + if (pending_offset == DT::zero()) { + m_throttle_deadline += sleep_dur; + } else { + // The clocks used to measure vkWaitForPresentKHR() are imprecise and noisy. + // We want to exponentially approach vsync at a velocity proportional to + // remaining distance, with a time constant equal to 100ms. + double velocity = duration_cast(pending_offset) / DT_us(100'000); + + // Limit velocity to at most 0.5ms per (frame≈16ms). + constexpr double MAX_VELOCITY = 1. / 32.; + velocity = std::clamp(velocity, -MAX_VELOCITY, MAX_VELOCITY); + + constexpr DT MAX_FRAME_DUR = duration_cast
(DT_ms(16)); + + // d=vt. Clamp t <= 1 frame to avoid unusual cases. + const DT_us distance = velocity * std::min(sleep_dur, MAX_FRAME_DUR); + applied_offset = duration_cast
(distance); + m_throttle_deadline += sleep_dur + applied_offset; + } + } // A maximum fallback is used to prevent the system from sleeping for // too long or going full speed in an attempt to catch up to timings. @@ -398,6 +432,11 @@ void CoreTimingManager::Throttle(const s64 target_cycle) const TimePoint time_after_sleep = Clock::now(); g_perf_metrics.CountThrottleSleep(time_after_sleep - time); } + + if (applied_offset != DT::zero()) { + std::unique_lock lk{s_pending_time_offset.Lock}; + s_pending_time_offset.Offset__ -= applied_offset; + } } void CoreTimingManager::ResetThrottle(s64 cycle) diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp index 99a7715eb5..a26ad92ca7 100644 --- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp @@ -16,9 +16,13 @@ namespace Vulkan { -CommandBufferManager::CommandBufferManager(bool use_threaded_submission) - : m_use_threaded_submission(use_threaded_submission) + +CommandBufferManager::CommandBufferManager(bool use_threaded_submission, bool vsync) + : m_vsync(vsync) + , m_use_threaded_submission(use_threaded_submission) { + std::unique_lock lk{s_pending_time_offset.Lock}; + s_pending_time_offset.Offset__ = DT::zero(); } CommandBufferManager::~CommandBufferManager() @@ -223,7 +227,7 @@ VkDescriptorSet CommandBufferManager::AllocateDescriptorSet(VkDescriptorSetLayou bool CommandBufferManager::CreateSubmitThread() { m_submit_thread.Reset("VK submission thread", [this](PendingCommandBufferSubmit submit) { - SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, + SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, submit.vsync, submit.present_image_index, submit.frame_id); CmdBufferResources& resources = m_command_buffers[submit.command_buffer_index]; resources.waiting_for_submit.store(false, std::memory_order_release); @@ -329,14 +333,14 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, { resources.waiting_for_submit.store(true, std::memory_order_relaxed); // Push to the pending submit queue. - m_submit_thread.Push({present_swap_chain, present_image_index, m_current_cmd_buffer, frame_id}); + m_submit_thread.Push({present_swap_chain, m_vsync, present_image_index, m_current_cmd_buffer, frame_id}); } else { WaitForWorkerThreadIdle(); // Pass through to normal submission path. - SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, present_image_index, frame_id); + SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, m_vsync, present_image_index, frame_id); if (wait_for_completion) WaitForCommandBufferCompletion(m_current_cmd_buffer); } @@ -389,6 +393,7 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain, + bool vsync, u32 present_image_index, u64 frame_id) { CmdBufferResources& resources = m_command_buffers[command_buffer_index]; @@ -451,7 +456,7 @@ void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index, if (g_vulkan_context->SupportsPresentWait()) { present_info.pNext = &present_id; - PresentQueued(frame_id, present_swap_chain); + PresentQueued(frame_id, present_swap_chain, vsync); } m_last_present_result = vkQueuePresentKHR(g_vulkan_context->GetPresentQueue(), &present_info); diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h index 243c92c2f6..ff4532140f 100644 --- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h @@ -14,23 +14,28 @@ #include #include +#include #include #include "Common/BlockingLoop.h" #include "Common/Flag.h" #include "Common/Semaphore.h" +#include "VideoCommon/PendingTimeOffset.h" #include "VideoBackends/Vulkan/Constants.h" namespace Vulkan { + class CommandBufferManager { public: - explicit CommandBufferManager(bool use_threaded_submission); + explicit CommandBufferManager(bool use_threaded_submission, bool vsync); ~CommandBufferManager(); bool Initialize(); + void SetVSync(bool vsync) { m_vsync = vsync; } + // These command buffers are allocated per-frame. They are valid until the command buffer // is submitted, after that you should call these functions again. VkCommandBuffer GetCurrentInitCommandBuffer() @@ -102,6 +107,7 @@ private: void WaitForCommandBufferCompletion(u32 command_buffer_index); void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain, + bool vsync, u32 present_image_index, u64 frame_id); void BeginCommandBuffer(); @@ -138,6 +144,7 @@ private: return m_command_buffers[m_current_cmd_buffer]; } + bool m_vsync; u64 m_next_fence_counter = 1; u64 m_completed_fence_counter = 0; @@ -150,6 +157,7 @@ private: struct PendingCommandBufferSubmit { VkSwapchainKHR present_swap_chain; + bool vsync; u32 present_image_index; u32 command_buffer_index; u64 frame_id; diff --git a/Source/Core/VideoBackends/Vulkan/PresentWait.cpp b/Source/Core/VideoBackends/Vulkan/PresentWait.cpp index 1e7d63d06b..85b329380f 100644 --- a/Source/Core/VideoBackends/Vulkan/PresentWait.cpp +++ b/Source/Core/VideoBackends/Vulkan/PresentWait.cpp @@ -8,6 +8,7 @@ #include #include "Common/WorkQueueThread.h" +#include "VideoCommon/PendingTimeOffset.h" #include "VideoBackends/Vulkan/VulkanContext.h" #include "VideoBackends/Vulkan/VulkanLoader.h" @@ -22,17 +23,25 @@ struct Wait { u64 present_id; VkSwapchainKHR swapchain; + bool vsync; }; static Common::WorkQueueThread s_present_wait_thread; -void WaitFunction(Wait wait) +static void WaitFunction(Wait wait) { + using namespace std::chrono; + do { + const TimePoint begin = Clock::now(); + // We choose a timeout of 20ms so can poll for IsFlushing VkResult res = vkWaitForPresentKHR(s_device, wait.swapchain, wait.present_id, 20'000'000); + const TimePoint end = Clock::now(); + DT block_dur = end - begin; + if (res == VK_TIMEOUT) { WARN_LOG_FMT(VIDEO, "vkWaitForPresentKHR timed out, retrying {}", wait.present_id); @@ -47,6 +56,20 @@ void WaitFunction(Wait wait) if (res == VK_SUCCESS) g_perf_metrics.CountPresent(); + if (wait.vsync) + { + block_dur = std::min(block_dur, duration_cast
(DT_us(2 * TARGET_VSYNC_BLOCK_US))); + + std::unique_lock lk{s_pending_time_offset.Lock}; + + // pray DT is signed. + s_pending_time_offset.Offset__ = + block_dur - duration_cast
(DT_us(TARGET_VSYNC_BLOCK_US)); + } else { + std::unique_lock lk{s_pending_time_offset.Lock}; + s_pending_time_offset.Offset__ = DT::zero(); + } + return; } while (!s_present_wait_thread.IsCancelling()); } @@ -62,9 +85,9 @@ void StopPresentWaitThread() s_present_wait_thread.Shutdown(); } -void PresentQueued(u64 present_id, VkSwapchainKHR swapchain) +void PresentQueued(u64 present_id, VkSwapchainKHR swapchain, bool vsync) { - s_present_wait_thread.EmplaceItem(Wait{present_id, swapchain}); + s_present_wait_thread.EmplaceItem(Wait{present_id, swapchain, vsync}); } void FlushPresentWaitQueue() diff --git a/Source/Core/VideoBackends/Vulkan/PresentWait.h b/Source/Core/VideoBackends/Vulkan/PresentWait.h index 8a650f8838..195773dc76 100644 --- a/Source/Core/VideoBackends/Vulkan/PresentWait.h +++ b/Source/Core/VideoBackends/Vulkan/PresentWait.h @@ -12,7 +12,7 @@ namespace Vulkan void StartPresentWaitThread(); void StopPresentWaitThread(); -void PresentQueued(u64 present_id, VkSwapchainKHR swapchain); +void PresentQueued(u64 present_id, VkSwapchainKHR swapchain, bool vsync); void FlushPresentWaitQueue(); } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VKGfx.cpp b/Source/Core/VideoBackends/Vulkan/VKGfx.cpp index fdfeb5713b..0a3f3aedc7 100644 --- a/Source/Core/VideoBackends/Vulkan/VKGfx.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKGfx.cpp @@ -395,6 +395,7 @@ void VKGfx::OnConfigChanged(u32 bits) if (m_swap_chain && (bits & CONFIG_CHANGE_BIT_VSYNC)) { ExecuteCommandBuffer(false, true); + g_command_buffer_mgr->SetVSync(g_ActiveConfig.bVSyncActive); m_swap_chain->SetVSync(g_ActiveConfig.bVSyncActive); } diff --git a/Source/Core/VideoBackends/Vulkan/VKMain.cpp b/Source/Core/VideoBackends/Vulkan/VKMain.cpp index 77c6a29374..34abf61b09 100644 --- a/Source/Core/VideoBackends/Vulkan/VKMain.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKMain.cpp @@ -198,7 +198,7 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) UpdateActiveConfig(); // Create command buffers. We do this separately because the other classes depend on it. - g_command_buffer_mgr = std::make_unique(g_Config.bBackendMultithreading); + g_command_buffer_mgr = std::make_unique(g_Config.bBackendMultithreading, g_ActiveConfig.bVSyncActive); if (!g_command_buffer_mgr->Initialize()) { PanicAlertFmt("Failed to create Vulkan command buffers"); diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index 6f12cb80ba..b37688623f 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -106,6 +106,8 @@ add_library(videocommon OnScreenUIKeyMap.h OpcodeDecoding.cpp OpcodeDecoding.h + PendingTimeOffset.cpp + PendingTimeOffset.h PerfQueryBase.cpp PerfQueryBase.h PerformanceMetrics.cpp diff --git a/Source/Core/VideoCommon/PendingTimeOffset.cpp b/Source/Core/VideoCommon/PendingTimeOffset.cpp new file mode 100644 index 0000000000..e7eab96909 --- /dev/null +++ b/Source/Core/VideoCommon/PendingTimeOffset.cpp @@ -0,0 +1,4 @@ +#include "PendingTimeOffset.h" + +// export +PendingTimeOffset s_pending_time_offset; diff --git a/Source/Core/VideoCommon/PendingTimeOffset.h b/Source/Core/VideoCommon/PendingTimeOffset.h new file mode 100644 index 0000000000..0e43880274 --- /dev/null +++ b/Source/Core/VideoCommon/PendingTimeOffset.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +#include "Common/CommonTypes.h" + +constexpr u64 TARGET_VSYNC_BLOCK_US = 2'000; + +struct PendingTimeOffset +{ + std::mutex Lock; + DT Offset__; +}; + +extern PendingTimeOffset s_pending_time_offset;