Add closed-loop latency control for Vulkan vsync

Depends on VK_KHR_present_wait.
This commit is contained in:
nyanpasu64 2023-07-05 19:00:07 -07:00
parent d7e2edeaa2
commit 73b5fa6810
10 changed files with 112 additions and 15 deletions

View File

@ -26,6 +26,9 @@
#include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoConfig.h"
// We should factor out video backpressure into a shared_ptr<PendingTimeOffset> later.
#include "VideoCommon/PendingTimeOffset.h"
namespace CoreTiming
{
// Sort by time, unless the times are the same, in which case sort by the order added to the queue
@ -346,6 +349,8 @@ void CoreTimingManager::Advance()
void CoreTimingManager::Throttle(const s64 target_cycle)
{
using namespace std::chrono;
// Based on number of cycles and emulation speed, increase the target deadline
const s64 cycles = target_cycle - m_throttle_last_cycle;
@ -358,9 +363,38 @@ void CoreTimingManager::Throttle(const s64 target_cycle)
const double speed =
Core::GetIsThrottlerTempDisabled() ? 0.0 : Config::Get(Config::MAIN_EMULATION_SPEED);
if (0.0 < speed)
m_throttle_deadline +=
std::chrono::duration_cast<DT>(DT_s(cycles) / (speed * m_throttle_clock_per_sec));
DT applied_offset = DT::zero();
if (0.0 < speed) {
ASSERT(cycles >= 0);
const DT sleep_dur = std::chrono::duration_cast<DT>(DT_s(cycles) / (speed * m_throttle_clock_per_sec));
DT pending_offset;
{
std::unique_lock lk{s_pending_time_offset.Lock};
pending_offset = s_pending_time_offset.Offset__;
}
if (pending_offset == DT::zero()) {
m_throttle_deadline += sleep_dur;
} else {
// The clocks used to measure vkWaitForPresentKHR() are imprecise and noisy.
// We want to exponentially approach vsync at a velocity proportional to
// remaining distance, with a time constant equal to 100ms.
double velocity = duration_cast<DT_us>(pending_offset) / DT_us(100'000);
// Limit velocity to at most 0.5ms per (frame≈16ms).
constexpr double MAX_VELOCITY = 1. / 32.;
velocity = std::clamp(velocity, -MAX_VELOCITY, MAX_VELOCITY);
constexpr DT MAX_FRAME_DUR = duration_cast<DT>(DT_ms(16));
// d=vt. Clamp t <= 1 frame to avoid unusual cases.
const DT_us distance = velocity * std::min(sleep_dur, MAX_FRAME_DUR);
applied_offset = duration_cast<DT>(distance);
m_throttle_deadline += sleep_dur + applied_offset;
}
}
// A maximum fallback is used to prevent the system from sleeping for
// too long or going full speed in an attempt to catch up to timings.
@ -398,6 +432,11 @@ void CoreTimingManager::Throttle(const s64 target_cycle)
const TimePoint time_after_sleep = Clock::now();
g_perf_metrics.CountThrottleSleep(time_after_sleep - time);
}
if (applied_offset != DT::zero()) {
std::unique_lock lk{s_pending_time_offset.Lock};
s_pending_time_offset.Offset__ -= applied_offset;
}
}
void CoreTimingManager::ResetThrottle(s64 cycle)

View File

@ -16,9 +16,13 @@
namespace Vulkan
{
CommandBufferManager::CommandBufferManager(bool use_threaded_submission)
: m_use_threaded_submission(use_threaded_submission)
CommandBufferManager::CommandBufferManager(bool use_threaded_submission, bool vsync)
: m_vsync(vsync)
, m_use_threaded_submission(use_threaded_submission)
{
std::unique_lock lk{s_pending_time_offset.Lock};
s_pending_time_offset.Offset__ = DT::zero();
}
CommandBufferManager::~CommandBufferManager()
@ -223,7 +227,7 @@ VkDescriptorSet CommandBufferManager::AllocateDescriptorSet(VkDescriptorSetLayou
bool CommandBufferManager::CreateSubmitThread()
{
m_submit_thread.Reset("VK submission thread", [this](PendingCommandBufferSubmit submit) {
SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain,
SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, submit.vsync,
submit.present_image_index, submit.frame_id);
CmdBufferResources& resources = m_command_buffers[submit.command_buffer_index];
resources.waiting_for_submit.store(false, std::memory_order_release);
@ -329,14 +333,14 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
{
resources.waiting_for_submit.store(true, std::memory_order_relaxed);
// Push to the pending submit queue.
m_submit_thread.Push({present_swap_chain, present_image_index, m_current_cmd_buffer, frame_id});
m_submit_thread.Push({present_swap_chain, m_vsync, present_image_index, m_current_cmd_buffer, frame_id});
}
else
{
WaitForWorkerThreadIdle();
// Pass through to normal submission path.
SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, present_image_index, frame_id);
SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, m_vsync, present_image_index, frame_id);
if (wait_for_completion)
WaitForCommandBufferCompletion(m_current_cmd_buffer);
}
@ -389,6 +393,7 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
VkSwapchainKHR present_swap_chain,
bool vsync,
u32 present_image_index, u64 frame_id)
{
CmdBufferResources& resources = m_command_buffers[command_buffer_index];
@ -451,7 +456,7 @@ void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
if (g_vulkan_context->SupportsPresentWait())
{
present_info.pNext = &present_id;
PresentQueued(frame_id, present_swap_chain);
PresentQueued(frame_id, present_swap_chain, vsync);
}
m_last_present_result = vkQueuePresentKHR(g_vulkan_context->GetPresentQueue(), &present_info);

View File

@ -14,23 +14,28 @@
#include <utility>
#include <vector>
#include <Common/CommonTypes.h>
#include <Common/WorkQueueThread.h>
#include "Common/BlockingLoop.h"
#include "Common/Flag.h"
#include "Common/Semaphore.h"
#include "VideoCommon/PendingTimeOffset.h"
#include "VideoBackends/Vulkan/Constants.h"
namespace Vulkan
{
class CommandBufferManager
{
public:
explicit CommandBufferManager(bool use_threaded_submission);
explicit CommandBufferManager(bool use_threaded_submission, bool vsync);
~CommandBufferManager();
bool Initialize();
void SetVSync(bool vsync) { m_vsync = vsync; }
// These command buffers are allocated per-frame. They are valid until the command buffer
// is submitted, after that you should call these functions again.
VkCommandBuffer GetCurrentInitCommandBuffer()
@ -102,6 +107,7 @@ private:
void WaitForCommandBufferCompletion(u32 command_buffer_index);
void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain,
bool vsync,
u32 present_image_index, u64 frame_id);
void BeginCommandBuffer();
@ -138,6 +144,7 @@ private:
return m_command_buffers[m_current_cmd_buffer];
}
bool m_vsync;
u64 m_next_fence_counter = 1;
u64 m_completed_fence_counter = 0;
@ -150,6 +157,7 @@ private:
struct PendingCommandBufferSubmit
{
VkSwapchainKHR present_swap_chain;
bool vsync;
u32 present_image_index;
u32 command_buffer_index;
u64 frame_id;

View File

@ -8,6 +8,7 @@
#include <tuple>
#include "Common/WorkQueueThread.h"
#include "VideoCommon/PendingTimeOffset.h"
#include "VideoBackends/Vulkan/VulkanContext.h"
#include "VideoBackends/Vulkan/VulkanLoader.h"
@ -22,17 +23,25 @@ struct Wait
{
u64 present_id;
VkSwapchainKHR swapchain;
bool vsync;
};
static Common::WorkQueueThread<Wait> s_present_wait_thread;
void WaitFunction(Wait wait)
static void WaitFunction(Wait wait)
{
using namespace std::chrono;
do
{
const TimePoint begin = Clock::now();
// We choose a timeout of 20ms so can poll for IsFlushing
VkResult res = vkWaitForPresentKHR(s_device, wait.swapchain, wait.present_id, 20'000'000);
const TimePoint end = Clock::now();
DT block_dur = end - begin;
if (res == VK_TIMEOUT)
{
WARN_LOG_FMT(VIDEO, "vkWaitForPresentKHR timed out, retrying {}", wait.present_id);
@ -47,6 +56,20 @@ void WaitFunction(Wait wait)
if (res == VK_SUCCESS)
g_perf_metrics.CountPresent();
if (wait.vsync)
{
block_dur = std::min(block_dur, duration_cast<DT>(DT_us(2 * TARGET_VSYNC_BLOCK_US)));
std::unique_lock lk{s_pending_time_offset.Lock};
// pray DT is signed.
s_pending_time_offset.Offset__ =
block_dur - duration_cast<DT>(DT_us(TARGET_VSYNC_BLOCK_US));
} else {
std::unique_lock lk{s_pending_time_offset.Lock};
s_pending_time_offset.Offset__ = DT::zero();
}
return;
} while (!s_present_wait_thread.IsCancelling());
}
@ -62,9 +85,9 @@ void StopPresentWaitThread()
s_present_wait_thread.Shutdown();
}
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain)
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain, bool vsync)
{
s_present_wait_thread.EmplaceItem(Wait{present_id, swapchain});
s_present_wait_thread.EmplaceItem(Wait{present_id, swapchain, vsync});
}
void FlushPresentWaitQueue()

View File

@ -12,7 +12,7 @@ namespace Vulkan
void StartPresentWaitThread();
void StopPresentWaitThread();
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain);
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain, bool vsync);
void FlushPresentWaitQueue();
} // namespace Vulkan

View File

@ -395,6 +395,7 @@ void VKGfx::OnConfigChanged(u32 bits)
if (m_swap_chain && (bits & CONFIG_CHANGE_BIT_VSYNC))
{
ExecuteCommandBuffer(false, true);
g_command_buffer_mgr->SetVSync(g_ActiveConfig.bVSyncActive);
m_swap_chain->SetVSync(g_ActiveConfig.bVSyncActive);
}

View File

@ -198,7 +198,7 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi)
UpdateActiveConfig();
// Create command buffers. We do this separately because the other classes depend on it.
g_command_buffer_mgr = std::make_unique<CommandBufferManager>(g_Config.bBackendMultithreading);
g_command_buffer_mgr = std::make_unique<CommandBufferManager>(g_Config.bBackendMultithreading, g_ActiveConfig.bVSyncActive);
if (!g_command_buffer_mgr->Initialize())
{
PanicAlertFmt("Failed to create Vulkan command buffers");

View File

@ -106,6 +106,8 @@ add_library(videocommon
OnScreenUIKeyMap.h
OpcodeDecoding.cpp
OpcodeDecoding.h
PendingTimeOffset.cpp
PendingTimeOffset.h
PerfQueryBase.cpp
PerfQueryBase.h
PerformanceMetrics.cpp

View File

@ -0,0 +1,4 @@
#include "PendingTimeOffset.h"
// export
PendingTimeOffset s_pending_time_offset;

View File

@ -0,0 +1,15 @@
#pragma once
#include <mutex>
#include "Common/CommonTypes.h"
constexpr u64 TARGET_VSYNC_BLOCK_US = 2'000;
struct PendingTimeOffset
{
std::mutex Lock;
DT Offset__;
};
extern PendingTimeOffset s_pending_time_offset;