Add closed-loop latency control for Vulkan vsync

Depends on VK_KHR_present_wait.
This commit is contained in:
nyanpasu64 2023-07-05 19:00:07 -07:00
parent d7e2edeaa2
commit 73b5fa6810
10 changed files with 112 additions and 15 deletions

View File

@ -26,6 +26,9 @@
#include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
// We should factor out video backpressure into a shared_ptr<PendingTimeOffset> later.
#include "VideoCommon/PendingTimeOffset.h"
namespace CoreTiming namespace CoreTiming
{ {
// Sort by time, unless the times are the same, in which case sort by the order added to the queue // Sort by time, unless the times are the same, in which case sort by the order added to the queue
@ -346,6 +349,8 @@ void CoreTimingManager::Advance()
void CoreTimingManager::Throttle(const s64 target_cycle) void CoreTimingManager::Throttle(const s64 target_cycle)
{ {
using namespace std::chrono;
// Based on number of cycles and emulation speed, increase the target deadline // Based on number of cycles and emulation speed, increase the target deadline
const s64 cycles = target_cycle - m_throttle_last_cycle; const s64 cycles = target_cycle - m_throttle_last_cycle;
@ -358,9 +363,38 @@ void CoreTimingManager::Throttle(const s64 target_cycle)
const double speed = const double speed =
Core::GetIsThrottlerTempDisabled() ? 0.0 : Config::Get(Config::MAIN_EMULATION_SPEED); Core::GetIsThrottlerTempDisabled() ? 0.0 : Config::Get(Config::MAIN_EMULATION_SPEED);
if (0.0 < speed) DT applied_offset = DT::zero();
m_throttle_deadline +=
std::chrono::duration_cast<DT>(DT_s(cycles) / (speed * m_throttle_clock_per_sec)); if (0.0 < speed) {
ASSERT(cycles >= 0);
const DT sleep_dur = std::chrono::duration_cast<DT>(DT_s(cycles) / (speed * m_throttle_clock_per_sec));
DT pending_offset;
{
std::unique_lock lk{s_pending_time_offset.Lock};
pending_offset = s_pending_time_offset.Offset__;
}
if (pending_offset == DT::zero()) {
m_throttle_deadline += sleep_dur;
} else {
// The clocks used to measure vkWaitForPresentKHR() are imprecise and noisy.
// We want to exponentially approach vsync at a velocity proportional to
// remaining distance, with a time constant equal to 100ms.
double velocity = duration_cast<DT_us>(pending_offset) / DT_us(100'000);
// Limit velocity to at most 0.5ms per (frame≈16ms).
constexpr double MAX_VELOCITY = 1. / 32.;
velocity = std::clamp(velocity, -MAX_VELOCITY, MAX_VELOCITY);
constexpr DT MAX_FRAME_DUR = duration_cast<DT>(DT_ms(16));
// d=vt. Clamp t <= 1 frame to avoid unusual cases.
const DT_us distance = velocity * std::min(sleep_dur, MAX_FRAME_DUR);
applied_offset = duration_cast<DT>(distance);
m_throttle_deadline += sleep_dur + applied_offset;
}
}
// A maximum fallback is used to prevent the system from sleeping for // A maximum fallback is used to prevent the system from sleeping for
// too long or going full speed in an attempt to catch up to timings. // too long or going full speed in an attempt to catch up to timings.
@ -398,6 +432,11 @@ void CoreTimingManager::Throttle(const s64 target_cycle)
const TimePoint time_after_sleep = Clock::now(); const TimePoint time_after_sleep = Clock::now();
g_perf_metrics.CountThrottleSleep(time_after_sleep - time); g_perf_metrics.CountThrottleSleep(time_after_sleep - time);
} }
if (applied_offset != DT::zero()) {
std::unique_lock lk{s_pending_time_offset.Lock};
s_pending_time_offset.Offset__ -= applied_offset;
}
} }
void CoreTimingManager::ResetThrottle(s64 cycle) void CoreTimingManager::ResetThrottle(s64 cycle)

View File

@ -16,9 +16,13 @@
namespace Vulkan namespace Vulkan
{ {
CommandBufferManager::CommandBufferManager(bool use_threaded_submission)
: m_use_threaded_submission(use_threaded_submission) CommandBufferManager::CommandBufferManager(bool use_threaded_submission, bool vsync)
: m_vsync(vsync)
, m_use_threaded_submission(use_threaded_submission)
{ {
std::unique_lock lk{s_pending_time_offset.Lock};
s_pending_time_offset.Offset__ = DT::zero();
} }
CommandBufferManager::~CommandBufferManager() CommandBufferManager::~CommandBufferManager()
@ -223,7 +227,7 @@ VkDescriptorSet CommandBufferManager::AllocateDescriptorSet(VkDescriptorSetLayou
bool CommandBufferManager::CreateSubmitThread() bool CommandBufferManager::CreateSubmitThread()
{ {
m_submit_thread.Reset("VK submission thread", [this](PendingCommandBufferSubmit submit) { m_submit_thread.Reset("VK submission thread", [this](PendingCommandBufferSubmit submit) {
SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, submit.vsync,
submit.present_image_index, submit.frame_id); submit.present_image_index, submit.frame_id);
CmdBufferResources& resources = m_command_buffers[submit.command_buffer_index]; CmdBufferResources& resources = m_command_buffers[submit.command_buffer_index];
resources.waiting_for_submit.store(false, std::memory_order_release); resources.waiting_for_submit.store(false, std::memory_order_release);
@ -329,14 +333,14 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
{ {
resources.waiting_for_submit.store(true, std::memory_order_relaxed); resources.waiting_for_submit.store(true, std::memory_order_relaxed);
// Push to the pending submit queue. // Push to the pending submit queue.
m_submit_thread.Push({present_swap_chain, present_image_index, m_current_cmd_buffer, frame_id}); m_submit_thread.Push({present_swap_chain, m_vsync, present_image_index, m_current_cmd_buffer, frame_id});
} }
else else
{ {
WaitForWorkerThreadIdle(); WaitForWorkerThreadIdle();
// Pass through to normal submission path. // Pass through to normal submission path.
SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, present_image_index, frame_id); SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, m_vsync, present_image_index, frame_id);
if (wait_for_completion) if (wait_for_completion)
WaitForCommandBufferCompletion(m_current_cmd_buffer); WaitForCommandBufferCompletion(m_current_cmd_buffer);
} }
@ -389,6 +393,7 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index, void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
VkSwapchainKHR present_swap_chain, VkSwapchainKHR present_swap_chain,
bool vsync,
u32 present_image_index, u64 frame_id) u32 present_image_index, u64 frame_id)
{ {
CmdBufferResources& resources = m_command_buffers[command_buffer_index]; CmdBufferResources& resources = m_command_buffers[command_buffer_index];
@ -451,7 +456,7 @@ void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
if (g_vulkan_context->SupportsPresentWait()) if (g_vulkan_context->SupportsPresentWait())
{ {
present_info.pNext = &present_id; present_info.pNext = &present_id;
PresentQueued(frame_id, present_swap_chain); PresentQueued(frame_id, present_swap_chain, vsync);
} }
m_last_present_result = vkQueuePresentKHR(g_vulkan_context->GetPresentQueue(), &present_info); m_last_present_result = vkQueuePresentKHR(g_vulkan_context->GetPresentQueue(), &present_info);

View File

@ -14,23 +14,28 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <Common/CommonTypes.h>
#include <Common/WorkQueueThread.h> #include <Common/WorkQueueThread.h>
#include "Common/BlockingLoop.h" #include "Common/BlockingLoop.h"
#include "Common/Flag.h" #include "Common/Flag.h"
#include "Common/Semaphore.h" #include "Common/Semaphore.h"
#include "VideoCommon/PendingTimeOffset.h"
#include "VideoBackends/Vulkan/Constants.h" #include "VideoBackends/Vulkan/Constants.h"
namespace Vulkan namespace Vulkan
{ {
class CommandBufferManager class CommandBufferManager
{ {
public: public:
explicit CommandBufferManager(bool use_threaded_submission); explicit CommandBufferManager(bool use_threaded_submission, bool vsync);
~CommandBufferManager(); ~CommandBufferManager();
bool Initialize(); bool Initialize();
void SetVSync(bool vsync) { m_vsync = vsync; }
// These command buffers are allocated per-frame. They are valid until the command buffer // These command buffers are allocated per-frame. They are valid until the command buffer
// is submitted, after that you should call these functions again. // is submitted, after that you should call these functions again.
VkCommandBuffer GetCurrentInitCommandBuffer() VkCommandBuffer GetCurrentInitCommandBuffer()
@ -102,6 +107,7 @@ private:
void WaitForCommandBufferCompletion(u32 command_buffer_index); void WaitForCommandBufferCompletion(u32 command_buffer_index);
void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain, void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain,
bool vsync,
u32 present_image_index, u64 frame_id); u32 present_image_index, u64 frame_id);
void BeginCommandBuffer(); void BeginCommandBuffer();
@ -138,6 +144,7 @@ private:
return m_command_buffers[m_current_cmd_buffer]; return m_command_buffers[m_current_cmd_buffer];
} }
bool m_vsync;
u64 m_next_fence_counter = 1; u64 m_next_fence_counter = 1;
u64 m_completed_fence_counter = 0; u64 m_completed_fence_counter = 0;
@ -150,6 +157,7 @@ private:
struct PendingCommandBufferSubmit struct PendingCommandBufferSubmit
{ {
VkSwapchainKHR present_swap_chain; VkSwapchainKHR present_swap_chain;
bool vsync;
u32 present_image_index; u32 present_image_index;
u32 command_buffer_index; u32 command_buffer_index;
u64 frame_id; u64 frame_id;

View File

@ -8,6 +8,7 @@
#include <tuple> #include <tuple>
#include "Common/WorkQueueThread.h" #include "Common/WorkQueueThread.h"
#include "VideoCommon/PendingTimeOffset.h"
#include "VideoBackends/Vulkan/VulkanContext.h" #include "VideoBackends/Vulkan/VulkanContext.h"
#include "VideoBackends/Vulkan/VulkanLoader.h" #include "VideoBackends/Vulkan/VulkanLoader.h"
@ -22,17 +23,25 @@ struct Wait
{ {
u64 present_id; u64 present_id;
VkSwapchainKHR swapchain; VkSwapchainKHR swapchain;
bool vsync;
}; };
static Common::WorkQueueThread<Wait> s_present_wait_thread; static Common::WorkQueueThread<Wait> s_present_wait_thread;
void WaitFunction(Wait wait) static void WaitFunction(Wait wait)
{ {
using namespace std::chrono;
do do
{ {
const TimePoint begin = Clock::now();
// We choose a timeout of 20ms so can poll for IsFlushing // We choose a timeout of 20ms so can poll for IsFlushing
VkResult res = vkWaitForPresentKHR(s_device, wait.swapchain, wait.present_id, 20'000'000); VkResult res = vkWaitForPresentKHR(s_device, wait.swapchain, wait.present_id, 20'000'000);
const TimePoint end = Clock::now();
DT block_dur = end - begin;
if (res == VK_TIMEOUT) if (res == VK_TIMEOUT)
{ {
WARN_LOG_FMT(VIDEO, "vkWaitForPresentKHR timed out, retrying {}", wait.present_id); WARN_LOG_FMT(VIDEO, "vkWaitForPresentKHR timed out, retrying {}", wait.present_id);
@ -47,6 +56,20 @@ void WaitFunction(Wait wait)
if (res == VK_SUCCESS) if (res == VK_SUCCESS)
g_perf_metrics.CountPresent(); g_perf_metrics.CountPresent();
if (wait.vsync)
{
block_dur = std::min(block_dur, duration_cast<DT>(DT_us(2 * TARGET_VSYNC_BLOCK_US)));
std::unique_lock lk{s_pending_time_offset.Lock};
// pray DT is signed.
s_pending_time_offset.Offset__ =
block_dur - duration_cast<DT>(DT_us(TARGET_VSYNC_BLOCK_US));
} else {
std::unique_lock lk{s_pending_time_offset.Lock};
s_pending_time_offset.Offset__ = DT::zero();
}
return; return;
} while (!s_present_wait_thread.IsCancelling()); } while (!s_present_wait_thread.IsCancelling());
} }
@ -62,9 +85,9 @@ void StopPresentWaitThread()
s_present_wait_thread.Shutdown(); s_present_wait_thread.Shutdown();
} }
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain) void PresentQueued(u64 present_id, VkSwapchainKHR swapchain, bool vsync)
{ {
s_present_wait_thread.EmplaceItem(Wait{present_id, swapchain}); s_present_wait_thread.EmplaceItem(Wait{present_id, swapchain, vsync});
} }
void FlushPresentWaitQueue() void FlushPresentWaitQueue()

View File

@ -12,7 +12,7 @@ namespace Vulkan
void StartPresentWaitThread(); void StartPresentWaitThread();
void StopPresentWaitThread(); void StopPresentWaitThread();
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain); void PresentQueued(u64 present_id, VkSwapchainKHR swapchain, bool vsync);
void FlushPresentWaitQueue(); void FlushPresentWaitQueue();
} // namespace Vulkan } // namespace Vulkan

View File

@ -395,6 +395,7 @@ void VKGfx::OnConfigChanged(u32 bits)
if (m_swap_chain && (bits & CONFIG_CHANGE_BIT_VSYNC)) if (m_swap_chain && (bits & CONFIG_CHANGE_BIT_VSYNC))
{ {
ExecuteCommandBuffer(false, true); ExecuteCommandBuffer(false, true);
g_command_buffer_mgr->SetVSync(g_ActiveConfig.bVSyncActive);
m_swap_chain->SetVSync(g_ActiveConfig.bVSyncActive); m_swap_chain->SetVSync(g_ActiveConfig.bVSyncActive);
} }

View File

@ -198,7 +198,7 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi)
UpdateActiveConfig(); UpdateActiveConfig();
// Create command buffers. We do this separately because the other classes depend on it. // Create command buffers. We do this separately because the other classes depend on it.
g_command_buffer_mgr = std::make_unique<CommandBufferManager>(g_Config.bBackendMultithreading); g_command_buffer_mgr = std::make_unique<CommandBufferManager>(g_Config.bBackendMultithreading, g_ActiveConfig.bVSyncActive);
if (!g_command_buffer_mgr->Initialize()) if (!g_command_buffer_mgr->Initialize())
{ {
PanicAlertFmt("Failed to create Vulkan command buffers"); PanicAlertFmt("Failed to create Vulkan command buffers");

View File

@ -106,6 +106,8 @@ add_library(videocommon
OnScreenUIKeyMap.h OnScreenUIKeyMap.h
OpcodeDecoding.cpp OpcodeDecoding.cpp
OpcodeDecoding.h OpcodeDecoding.h
PendingTimeOffset.cpp
PendingTimeOffset.h
PerfQueryBase.cpp PerfQueryBase.cpp
PerfQueryBase.h PerfQueryBase.h
PerformanceMetrics.cpp PerformanceMetrics.cpp

View File

@ -0,0 +1,4 @@
#include "PendingTimeOffset.h"
// export
PendingTimeOffset s_pending_time_offset;

View File

@ -0,0 +1,15 @@
#pragma once
#include <mutex>
#include "Common/CommonTypes.h"
constexpr u64 TARGET_VSYNC_BLOCK_US = 2'000;
struct PendingTimeOffset
{
std::mutex Lock;
DT Offset__;
};
extern PendingTimeOffset s_pending_time_offset;