Add closed-loop latency control for Vulkan vsync
Depends on VK_KHR_present_wait.
This commit is contained in:
parent
d7e2edeaa2
commit
73b5fa6810
|
@ -26,6 +26,9 @@
|
|||
#include "VideoCommon/VideoBackendBase.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
|
||||
// We should factor out video backpressure into a shared_ptr<PendingTimeOffset> later.
|
||||
#include "VideoCommon/PendingTimeOffset.h"
|
||||
|
||||
namespace CoreTiming
|
||||
{
|
||||
// Sort by time, unless the times are the same, in which case sort by the order added to the queue
|
||||
|
@ -346,6 +349,8 @@ void CoreTimingManager::Advance()
|
|||
|
||||
void CoreTimingManager::Throttle(const s64 target_cycle)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
// Based on number of cycles and emulation speed, increase the target deadline
|
||||
const s64 cycles = target_cycle - m_throttle_last_cycle;
|
||||
|
||||
|
@ -358,9 +363,38 @@ void CoreTimingManager::Throttle(const s64 target_cycle)
|
|||
const double speed =
|
||||
Core::GetIsThrottlerTempDisabled() ? 0.0 : Config::Get(Config::MAIN_EMULATION_SPEED);
|
||||
|
||||
if (0.0 < speed)
|
||||
m_throttle_deadline +=
|
||||
std::chrono::duration_cast<DT>(DT_s(cycles) / (speed * m_throttle_clock_per_sec));
|
||||
DT applied_offset = DT::zero();
|
||||
|
||||
if (0.0 < speed) {
|
||||
ASSERT(cycles >= 0);
|
||||
|
||||
const DT sleep_dur = std::chrono::duration_cast<DT>(DT_s(cycles) / (speed * m_throttle_clock_per_sec));
|
||||
|
||||
DT pending_offset;
|
||||
{
|
||||
std::unique_lock lk{s_pending_time_offset.Lock};
|
||||
pending_offset = s_pending_time_offset.Offset__;
|
||||
}
|
||||
if (pending_offset == DT::zero()) {
|
||||
m_throttle_deadline += sleep_dur;
|
||||
} else {
|
||||
// The clocks used to measure vkWaitForPresentKHR() are imprecise and noisy.
|
||||
// We want to exponentially approach vsync at a velocity proportional to
|
||||
// remaining distance, with a time constant equal to 100ms.
|
||||
double velocity = duration_cast<DT_us>(pending_offset) / DT_us(100'000);
|
||||
|
||||
// Limit velocity to at most 0.5ms per (frame≈16ms).
|
||||
constexpr double MAX_VELOCITY = 1. / 32.;
|
||||
velocity = std::clamp(velocity, -MAX_VELOCITY, MAX_VELOCITY);
|
||||
|
||||
constexpr DT MAX_FRAME_DUR = duration_cast<DT>(DT_ms(16));
|
||||
|
||||
// d=vt. Clamp t <= 1 frame to avoid unusual cases.
|
||||
const DT_us distance = velocity * std::min(sleep_dur, MAX_FRAME_DUR);
|
||||
applied_offset = duration_cast<DT>(distance);
|
||||
m_throttle_deadline += sleep_dur + applied_offset;
|
||||
}
|
||||
}
|
||||
|
||||
// A maximum fallback is used to prevent the system from sleeping for
|
||||
// too long or going full speed in an attempt to catch up to timings.
|
||||
|
@ -398,6 +432,11 @@ void CoreTimingManager::Throttle(const s64 target_cycle)
|
|||
const TimePoint time_after_sleep = Clock::now();
|
||||
g_perf_metrics.CountThrottleSleep(time_after_sleep - time);
|
||||
}
|
||||
|
||||
if (applied_offset != DT::zero()) {
|
||||
std::unique_lock lk{s_pending_time_offset.Lock};
|
||||
s_pending_time_offset.Offset__ -= applied_offset;
|
||||
}
|
||||
}
|
||||
|
||||
void CoreTimingManager::ResetThrottle(s64 cycle)
|
||||
|
|
|
@ -16,9 +16,13 @@
|
|||
|
||||
namespace Vulkan
|
||||
{
|
||||
CommandBufferManager::CommandBufferManager(bool use_threaded_submission)
|
||||
: m_use_threaded_submission(use_threaded_submission)
|
||||
|
||||
CommandBufferManager::CommandBufferManager(bool use_threaded_submission, bool vsync)
|
||||
: m_vsync(vsync)
|
||||
, m_use_threaded_submission(use_threaded_submission)
|
||||
{
|
||||
std::unique_lock lk{s_pending_time_offset.Lock};
|
||||
s_pending_time_offset.Offset__ = DT::zero();
|
||||
}
|
||||
|
||||
CommandBufferManager::~CommandBufferManager()
|
||||
|
@ -223,7 +227,7 @@ VkDescriptorSet CommandBufferManager::AllocateDescriptorSet(VkDescriptorSetLayou
|
|||
bool CommandBufferManager::CreateSubmitThread()
|
||||
{
|
||||
m_submit_thread.Reset("VK submission thread", [this](PendingCommandBufferSubmit submit) {
|
||||
SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain,
|
||||
SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, submit.vsync,
|
||||
submit.present_image_index, submit.frame_id);
|
||||
CmdBufferResources& resources = m_command_buffers[submit.command_buffer_index];
|
||||
resources.waiting_for_submit.store(false, std::memory_order_release);
|
||||
|
@ -329,14 +333,14 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
|
|||
{
|
||||
resources.waiting_for_submit.store(true, std::memory_order_relaxed);
|
||||
// Push to the pending submit queue.
|
||||
m_submit_thread.Push({present_swap_chain, present_image_index, m_current_cmd_buffer, frame_id});
|
||||
m_submit_thread.Push({present_swap_chain, m_vsync, present_image_index, m_current_cmd_buffer, frame_id});
|
||||
}
|
||||
else
|
||||
{
|
||||
WaitForWorkerThreadIdle();
|
||||
|
||||
// Pass through to normal submission path.
|
||||
SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, present_image_index, frame_id);
|
||||
SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, m_vsync, present_image_index, frame_id);
|
||||
if (wait_for_completion)
|
||||
WaitForCommandBufferCompletion(m_current_cmd_buffer);
|
||||
}
|
||||
|
@ -389,6 +393,7 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
|
|||
|
||||
void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
|
||||
VkSwapchainKHR present_swap_chain,
|
||||
bool vsync,
|
||||
u32 present_image_index, u64 frame_id)
|
||||
{
|
||||
CmdBufferResources& resources = m_command_buffers[command_buffer_index];
|
||||
|
@ -451,7 +456,7 @@ void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
|
|||
if (g_vulkan_context->SupportsPresentWait())
|
||||
{
|
||||
present_info.pNext = &present_id;
|
||||
PresentQueued(frame_id, present_swap_chain);
|
||||
PresentQueued(frame_id, present_swap_chain, vsync);
|
||||
}
|
||||
|
||||
m_last_present_result = vkQueuePresentKHR(g_vulkan_context->GetPresentQueue(), &present_info);
|
||||
|
|
|
@ -14,23 +14,28 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <Common/CommonTypes.h>
|
||||
#include <Common/WorkQueueThread.h>
|
||||
#include "Common/BlockingLoop.h"
|
||||
#include "Common/Flag.h"
|
||||
#include "Common/Semaphore.h"
|
||||
|
||||
#include "VideoCommon/PendingTimeOffset.h"
|
||||
#include "VideoBackends/Vulkan/Constants.h"
|
||||
|
||||
namespace Vulkan
|
||||
{
|
||||
|
||||
class CommandBufferManager
|
||||
{
|
||||
public:
|
||||
explicit CommandBufferManager(bool use_threaded_submission);
|
||||
explicit CommandBufferManager(bool use_threaded_submission, bool vsync);
|
||||
~CommandBufferManager();
|
||||
|
||||
bool Initialize();
|
||||
|
||||
void SetVSync(bool vsync) { m_vsync = vsync; }
|
||||
|
||||
// These command buffers are allocated per-frame. They are valid until the command buffer
|
||||
// is submitted, after that you should call these functions again.
|
||||
VkCommandBuffer GetCurrentInitCommandBuffer()
|
||||
|
@ -102,6 +107,7 @@ private:
|
|||
|
||||
void WaitForCommandBufferCompletion(u32 command_buffer_index);
|
||||
void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain,
|
||||
bool vsync,
|
||||
u32 present_image_index, u64 frame_id);
|
||||
void BeginCommandBuffer();
|
||||
|
||||
|
@ -138,6 +144,7 @@ private:
|
|||
return m_command_buffers[m_current_cmd_buffer];
|
||||
}
|
||||
|
||||
bool m_vsync;
|
||||
u64 m_next_fence_counter = 1;
|
||||
u64 m_completed_fence_counter = 0;
|
||||
|
||||
|
@ -150,6 +157,7 @@ private:
|
|||
struct PendingCommandBufferSubmit
|
||||
{
|
||||
VkSwapchainKHR present_swap_chain;
|
||||
bool vsync;
|
||||
u32 present_image_index;
|
||||
u32 command_buffer_index;
|
||||
u64 frame_id;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <tuple>
|
||||
|
||||
#include "Common/WorkQueueThread.h"
|
||||
#include "VideoCommon/PendingTimeOffset.h"
|
||||
|
||||
#include "VideoBackends/Vulkan/VulkanContext.h"
|
||||
#include "VideoBackends/Vulkan/VulkanLoader.h"
|
||||
|
@ -22,17 +23,25 @@ struct Wait
|
|||
{
|
||||
u64 present_id;
|
||||
VkSwapchainKHR swapchain;
|
||||
bool vsync;
|
||||
};
|
||||
|
||||
static Common::WorkQueueThread<Wait> s_present_wait_thread;
|
||||
|
||||
void WaitFunction(Wait wait)
|
||||
static void WaitFunction(Wait wait)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
do
|
||||
{
|
||||
const TimePoint begin = Clock::now();
|
||||
|
||||
// We choose a timeout of 20ms so can poll for IsFlushing
|
||||
VkResult res = vkWaitForPresentKHR(s_device, wait.swapchain, wait.present_id, 20'000'000);
|
||||
|
||||
const TimePoint end = Clock::now();
|
||||
DT block_dur = end - begin;
|
||||
|
||||
if (res == VK_TIMEOUT)
|
||||
{
|
||||
WARN_LOG_FMT(VIDEO, "vkWaitForPresentKHR timed out, retrying {}", wait.present_id);
|
||||
|
@ -47,6 +56,20 @@ void WaitFunction(Wait wait)
|
|||
if (res == VK_SUCCESS)
|
||||
g_perf_metrics.CountPresent();
|
||||
|
||||
if (wait.vsync)
|
||||
{
|
||||
block_dur = std::min(block_dur, duration_cast<DT>(DT_us(2 * TARGET_VSYNC_BLOCK_US)));
|
||||
|
||||
std::unique_lock lk{s_pending_time_offset.Lock};
|
||||
|
||||
// pray DT is signed.
|
||||
s_pending_time_offset.Offset__ =
|
||||
block_dur - duration_cast<DT>(DT_us(TARGET_VSYNC_BLOCK_US));
|
||||
} else {
|
||||
std::unique_lock lk{s_pending_time_offset.Lock};
|
||||
s_pending_time_offset.Offset__ = DT::zero();
|
||||
}
|
||||
|
||||
return;
|
||||
} while (!s_present_wait_thread.IsCancelling());
|
||||
}
|
||||
|
@ -62,9 +85,9 @@ void StopPresentWaitThread()
|
|||
s_present_wait_thread.Shutdown();
|
||||
}
|
||||
|
||||
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain)
|
||||
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain, bool vsync)
|
||||
{
|
||||
s_present_wait_thread.EmplaceItem(Wait{present_id, swapchain});
|
||||
s_present_wait_thread.EmplaceItem(Wait{present_id, swapchain, vsync});
|
||||
}
|
||||
|
||||
void FlushPresentWaitQueue()
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace Vulkan
|
|||
void StartPresentWaitThread();
|
||||
void StopPresentWaitThread();
|
||||
|
||||
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain);
|
||||
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain, bool vsync);
|
||||
void FlushPresentWaitQueue();
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -395,6 +395,7 @@ void VKGfx::OnConfigChanged(u32 bits)
|
|||
if (m_swap_chain && (bits & CONFIG_CHANGE_BIT_VSYNC))
|
||||
{
|
||||
ExecuteCommandBuffer(false, true);
|
||||
g_command_buffer_mgr->SetVSync(g_ActiveConfig.bVSyncActive);
|
||||
m_swap_chain->SetVSync(g_ActiveConfig.bVSyncActive);
|
||||
}
|
||||
|
||||
|
|
|
@ -198,7 +198,7 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi)
|
|||
UpdateActiveConfig();
|
||||
|
||||
// Create command buffers. We do this separately because the other classes depend on it.
|
||||
g_command_buffer_mgr = std::make_unique<CommandBufferManager>(g_Config.bBackendMultithreading);
|
||||
g_command_buffer_mgr = std::make_unique<CommandBufferManager>(g_Config.bBackendMultithreading, g_ActiveConfig.bVSyncActive);
|
||||
if (!g_command_buffer_mgr->Initialize())
|
||||
{
|
||||
PanicAlertFmt("Failed to create Vulkan command buffers");
|
||||
|
|
|
@ -106,6 +106,8 @@ add_library(videocommon
|
|||
OnScreenUIKeyMap.h
|
||||
OpcodeDecoding.cpp
|
||||
OpcodeDecoding.h
|
||||
PendingTimeOffset.cpp
|
||||
PendingTimeOffset.h
|
||||
PerfQueryBase.cpp
|
||||
PerfQueryBase.h
|
||||
PerformanceMetrics.cpp
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
#include "PendingTimeOffset.h"
|
||||
|
||||
// export
|
||||
PendingTimeOffset s_pending_time_offset;
|
|
@ -0,0 +1,15 @@
|
|||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
constexpr u64 TARGET_VSYNC_BLOCK_US = 2'000;
|
||||
|
||||
struct PendingTimeOffset
|
||||
{
|
||||
std::mutex Lock;
|
||||
DT Offset__;
|
||||
};
|
||||
|
||||
extern PendingTimeOffset s_pending_time_offset;
|
Loading…
Reference in New Issue