Add closed-loop latency control for Vulkan vsync
Depends on VK_KHR_present_wait.
This commit is contained in:
parent
d7e2edeaa2
commit
73b5fa6810
|
@ -26,6 +26,9 @@
|
||||||
#include "VideoCommon/VideoBackendBase.h"
|
#include "VideoCommon/VideoBackendBase.h"
|
||||||
#include "VideoCommon/VideoConfig.h"
|
#include "VideoCommon/VideoConfig.h"
|
||||||
|
|
||||||
|
// We should factor out video backpressure into a shared_ptr<PendingTimeOffset> later.
|
||||||
|
#include "VideoCommon/PendingTimeOffset.h"
|
||||||
|
|
||||||
namespace CoreTiming
|
namespace CoreTiming
|
||||||
{
|
{
|
||||||
// Sort by time, unless the times are the same, in which case sort by the order added to the queue
|
// Sort by time, unless the times are the same, in which case sort by the order added to the queue
|
||||||
|
@ -346,6 +349,8 @@ void CoreTimingManager::Advance()
|
||||||
|
|
||||||
void CoreTimingManager::Throttle(const s64 target_cycle)
|
void CoreTimingManager::Throttle(const s64 target_cycle)
|
||||||
{
|
{
|
||||||
|
using namespace std::chrono;
|
||||||
|
|
||||||
// Based on number of cycles and emulation speed, increase the target deadline
|
// Based on number of cycles and emulation speed, increase the target deadline
|
||||||
const s64 cycles = target_cycle - m_throttle_last_cycle;
|
const s64 cycles = target_cycle - m_throttle_last_cycle;
|
||||||
|
|
||||||
|
@ -358,9 +363,38 @@ void CoreTimingManager::Throttle(const s64 target_cycle)
|
||||||
const double speed =
|
const double speed =
|
||||||
Core::GetIsThrottlerTempDisabled() ? 0.0 : Config::Get(Config::MAIN_EMULATION_SPEED);
|
Core::GetIsThrottlerTempDisabled() ? 0.0 : Config::Get(Config::MAIN_EMULATION_SPEED);
|
||||||
|
|
||||||
if (0.0 < speed)
|
DT applied_offset = DT::zero();
|
||||||
m_throttle_deadline +=
|
|
||||||
std::chrono::duration_cast<DT>(DT_s(cycles) / (speed * m_throttle_clock_per_sec));
|
if (0.0 < speed) {
|
||||||
|
ASSERT(cycles >= 0);
|
||||||
|
|
||||||
|
const DT sleep_dur = std::chrono::duration_cast<DT>(DT_s(cycles) / (speed * m_throttle_clock_per_sec));
|
||||||
|
|
||||||
|
DT pending_offset;
|
||||||
|
{
|
||||||
|
std::unique_lock lk{s_pending_time_offset.Lock};
|
||||||
|
pending_offset = s_pending_time_offset.Offset__;
|
||||||
|
}
|
||||||
|
if (pending_offset == DT::zero()) {
|
||||||
|
m_throttle_deadline += sleep_dur;
|
||||||
|
} else {
|
||||||
|
// The clocks used to measure vkWaitForPresentKHR() are imprecise and noisy.
|
||||||
|
// We want to exponentially approach vsync at a velocity proportional to
|
||||||
|
// remaining distance, with a time constant equal to 100ms.
|
||||||
|
double velocity = duration_cast<DT_us>(pending_offset) / DT_us(100'000);
|
||||||
|
|
||||||
|
// Limit velocity to at most 0.5ms per (frame≈16ms).
|
||||||
|
constexpr double MAX_VELOCITY = 1. / 32.;
|
||||||
|
velocity = std::clamp(velocity, -MAX_VELOCITY, MAX_VELOCITY);
|
||||||
|
|
||||||
|
constexpr DT MAX_FRAME_DUR = duration_cast<DT>(DT_ms(16));
|
||||||
|
|
||||||
|
// d=vt. Clamp t <= 1 frame to avoid unusual cases.
|
||||||
|
const DT_us distance = velocity * std::min(sleep_dur, MAX_FRAME_DUR);
|
||||||
|
applied_offset = duration_cast<DT>(distance);
|
||||||
|
m_throttle_deadline += sleep_dur + applied_offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// A maximum fallback is used to prevent the system from sleeping for
|
// A maximum fallback is used to prevent the system from sleeping for
|
||||||
// too long or going full speed in an attempt to catch up to timings.
|
// too long or going full speed in an attempt to catch up to timings.
|
||||||
|
@ -398,6 +432,11 @@ void CoreTimingManager::Throttle(const s64 target_cycle)
|
||||||
const TimePoint time_after_sleep = Clock::now();
|
const TimePoint time_after_sleep = Clock::now();
|
||||||
g_perf_metrics.CountThrottleSleep(time_after_sleep - time);
|
g_perf_metrics.CountThrottleSleep(time_after_sleep - time);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (applied_offset != DT::zero()) {
|
||||||
|
std::unique_lock lk{s_pending_time_offset.Lock};
|
||||||
|
s_pending_time_offset.Offset__ -= applied_offset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CoreTimingManager::ResetThrottle(s64 cycle)
|
void CoreTimingManager::ResetThrottle(s64 cycle)
|
||||||
|
|
|
@ -16,9 +16,13 @@
|
||||||
|
|
||||||
namespace Vulkan
|
namespace Vulkan
|
||||||
{
|
{
|
||||||
CommandBufferManager::CommandBufferManager(bool use_threaded_submission)
|
|
||||||
: m_use_threaded_submission(use_threaded_submission)
|
CommandBufferManager::CommandBufferManager(bool use_threaded_submission, bool vsync)
|
||||||
|
: m_vsync(vsync)
|
||||||
|
, m_use_threaded_submission(use_threaded_submission)
|
||||||
{
|
{
|
||||||
|
std::unique_lock lk{s_pending_time_offset.Lock};
|
||||||
|
s_pending_time_offset.Offset__ = DT::zero();
|
||||||
}
|
}
|
||||||
|
|
||||||
CommandBufferManager::~CommandBufferManager()
|
CommandBufferManager::~CommandBufferManager()
|
||||||
|
@ -223,7 +227,7 @@ VkDescriptorSet CommandBufferManager::AllocateDescriptorSet(VkDescriptorSetLayou
|
||||||
bool CommandBufferManager::CreateSubmitThread()
|
bool CommandBufferManager::CreateSubmitThread()
|
||||||
{
|
{
|
||||||
m_submit_thread.Reset("VK submission thread", [this](PendingCommandBufferSubmit submit) {
|
m_submit_thread.Reset("VK submission thread", [this](PendingCommandBufferSubmit submit) {
|
||||||
SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain,
|
SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, submit.vsync,
|
||||||
submit.present_image_index, submit.frame_id);
|
submit.present_image_index, submit.frame_id);
|
||||||
CmdBufferResources& resources = m_command_buffers[submit.command_buffer_index];
|
CmdBufferResources& resources = m_command_buffers[submit.command_buffer_index];
|
||||||
resources.waiting_for_submit.store(false, std::memory_order_release);
|
resources.waiting_for_submit.store(false, std::memory_order_release);
|
||||||
|
@ -329,14 +333,14 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
|
||||||
{
|
{
|
||||||
resources.waiting_for_submit.store(true, std::memory_order_relaxed);
|
resources.waiting_for_submit.store(true, std::memory_order_relaxed);
|
||||||
// Push to the pending submit queue.
|
// Push to the pending submit queue.
|
||||||
m_submit_thread.Push({present_swap_chain, present_image_index, m_current_cmd_buffer, frame_id});
|
m_submit_thread.Push({present_swap_chain, m_vsync, present_image_index, m_current_cmd_buffer, frame_id});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
WaitForWorkerThreadIdle();
|
WaitForWorkerThreadIdle();
|
||||||
|
|
||||||
// Pass through to normal submission path.
|
// Pass through to normal submission path.
|
||||||
SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, present_image_index, frame_id);
|
SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, m_vsync, present_image_index, frame_id);
|
||||||
if (wait_for_completion)
|
if (wait_for_completion)
|
||||||
WaitForCommandBufferCompletion(m_current_cmd_buffer);
|
WaitForCommandBufferCompletion(m_current_cmd_buffer);
|
||||||
}
|
}
|
||||||
|
@ -389,6 +393,7 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
|
||||||
|
|
||||||
void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
|
void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
|
||||||
VkSwapchainKHR present_swap_chain,
|
VkSwapchainKHR present_swap_chain,
|
||||||
|
bool vsync,
|
||||||
u32 present_image_index, u64 frame_id)
|
u32 present_image_index, u64 frame_id)
|
||||||
{
|
{
|
||||||
CmdBufferResources& resources = m_command_buffers[command_buffer_index];
|
CmdBufferResources& resources = m_command_buffers[command_buffer_index];
|
||||||
|
@ -451,7 +456,7 @@ void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
|
||||||
if (g_vulkan_context->SupportsPresentWait())
|
if (g_vulkan_context->SupportsPresentWait())
|
||||||
{
|
{
|
||||||
present_info.pNext = &present_id;
|
present_info.pNext = &present_id;
|
||||||
PresentQueued(frame_id, present_swap_chain);
|
PresentQueued(frame_id, present_swap_chain, vsync);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_last_present_result = vkQueuePresentKHR(g_vulkan_context->GetPresentQueue(), &present_info);
|
m_last_present_result = vkQueuePresentKHR(g_vulkan_context->GetPresentQueue(), &present_info);
|
||||||
|
|
|
@ -14,23 +14,28 @@
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include <Common/CommonTypes.h>
|
||||||
#include <Common/WorkQueueThread.h>
|
#include <Common/WorkQueueThread.h>
|
||||||
#include "Common/BlockingLoop.h"
|
#include "Common/BlockingLoop.h"
|
||||||
#include "Common/Flag.h"
|
#include "Common/Flag.h"
|
||||||
#include "Common/Semaphore.h"
|
#include "Common/Semaphore.h"
|
||||||
|
|
||||||
|
#include "VideoCommon/PendingTimeOffset.h"
|
||||||
#include "VideoBackends/Vulkan/Constants.h"
|
#include "VideoBackends/Vulkan/Constants.h"
|
||||||
|
|
||||||
namespace Vulkan
|
namespace Vulkan
|
||||||
{
|
{
|
||||||
|
|
||||||
class CommandBufferManager
|
class CommandBufferManager
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit CommandBufferManager(bool use_threaded_submission);
|
explicit CommandBufferManager(bool use_threaded_submission, bool vsync);
|
||||||
~CommandBufferManager();
|
~CommandBufferManager();
|
||||||
|
|
||||||
bool Initialize();
|
bool Initialize();
|
||||||
|
|
||||||
|
void SetVSync(bool vsync) { m_vsync = vsync; }
|
||||||
|
|
||||||
// These command buffers are allocated per-frame. They are valid until the command buffer
|
// These command buffers are allocated per-frame. They are valid until the command buffer
|
||||||
// is submitted, after that you should call these functions again.
|
// is submitted, after that you should call these functions again.
|
||||||
VkCommandBuffer GetCurrentInitCommandBuffer()
|
VkCommandBuffer GetCurrentInitCommandBuffer()
|
||||||
|
@ -102,6 +107,7 @@ private:
|
||||||
|
|
||||||
void WaitForCommandBufferCompletion(u32 command_buffer_index);
|
void WaitForCommandBufferCompletion(u32 command_buffer_index);
|
||||||
void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain,
|
void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain,
|
||||||
|
bool vsync,
|
||||||
u32 present_image_index, u64 frame_id);
|
u32 present_image_index, u64 frame_id);
|
||||||
void BeginCommandBuffer();
|
void BeginCommandBuffer();
|
||||||
|
|
||||||
|
@ -138,6 +144,7 @@ private:
|
||||||
return m_command_buffers[m_current_cmd_buffer];
|
return m_command_buffers[m_current_cmd_buffer];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool m_vsync;
|
||||||
u64 m_next_fence_counter = 1;
|
u64 m_next_fence_counter = 1;
|
||||||
u64 m_completed_fence_counter = 0;
|
u64 m_completed_fence_counter = 0;
|
||||||
|
|
||||||
|
@ -150,6 +157,7 @@ private:
|
||||||
struct PendingCommandBufferSubmit
|
struct PendingCommandBufferSubmit
|
||||||
{
|
{
|
||||||
VkSwapchainKHR present_swap_chain;
|
VkSwapchainKHR present_swap_chain;
|
||||||
|
bool vsync;
|
||||||
u32 present_image_index;
|
u32 present_image_index;
|
||||||
u32 command_buffer_index;
|
u32 command_buffer_index;
|
||||||
u64 frame_id;
|
u64 frame_id;
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
#include "Common/WorkQueueThread.h"
|
#include "Common/WorkQueueThread.h"
|
||||||
|
#include "VideoCommon/PendingTimeOffset.h"
|
||||||
|
|
||||||
#include "VideoBackends/Vulkan/VulkanContext.h"
|
#include "VideoBackends/Vulkan/VulkanContext.h"
|
||||||
#include "VideoBackends/Vulkan/VulkanLoader.h"
|
#include "VideoBackends/Vulkan/VulkanLoader.h"
|
||||||
|
@ -22,17 +23,25 @@ struct Wait
|
||||||
{
|
{
|
||||||
u64 present_id;
|
u64 present_id;
|
||||||
VkSwapchainKHR swapchain;
|
VkSwapchainKHR swapchain;
|
||||||
|
bool vsync;
|
||||||
};
|
};
|
||||||
|
|
||||||
static Common::WorkQueueThread<Wait> s_present_wait_thread;
|
static Common::WorkQueueThread<Wait> s_present_wait_thread;
|
||||||
|
|
||||||
void WaitFunction(Wait wait)
|
static void WaitFunction(Wait wait)
|
||||||
{
|
{
|
||||||
|
using namespace std::chrono;
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
const TimePoint begin = Clock::now();
|
||||||
|
|
||||||
// We choose a timeout of 20ms so can poll for IsFlushing
|
// We choose a timeout of 20ms so can poll for IsFlushing
|
||||||
VkResult res = vkWaitForPresentKHR(s_device, wait.swapchain, wait.present_id, 20'000'000);
|
VkResult res = vkWaitForPresentKHR(s_device, wait.swapchain, wait.present_id, 20'000'000);
|
||||||
|
|
||||||
|
const TimePoint end = Clock::now();
|
||||||
|
DT block_dur = end - begin;
|
||||||
|
|
||||||
if (res == VK_TIMEOUT)
|
if (res == VK_TIMEOUT)
|
||||||
{
|
{
|
||||||
WARN_LOG_FMT(VIDEO, "vkWaitForPresentKHR timed out, retrying {}", wait.present_id);
|
WARN_LOG_FMT(VIDEO, "vkWaitForPresentKHR timed out, retrying {}", wait.present_id);
|
||||||
|
@ -47,6 +56,20 @@ void WaitFunction(Wait wait)
|
||||||
if (res == VK_SUCCESS)
|
if (res == VK_SUCCESS)
|
||||||
g_perf_metrics.CountPresent();
|
g_perf_metrics.CountPresent();
|
||||||
|
|
||||||
|
if (wait.vsync)
|
||||||
|
{
|
||||||
|
block_dur = std::min(block_dur, duration_cast<DT>(DT_us(2 * TARGET_VSYNC_BLOCK_US)));
|
||||||
|
|
||||||
|
std::unique_lock lk{s_pending_time_offset.Lock};
|
||||||
|
|
||||||
|
// pray DT is signed.
|
||||||
|
s_pending_time_offset.Offset__ =
|
||||||
|
block_dur - duration_cast<DT>(DT_us(TARGET_VSYNC_BLOCK_US));
|
||||||
|
} else {
|
||||||
|
std::unique_lock lk{s_pending_time_offset.Lock};
|
||||||
|
s_pending_time_offset.Offset__ = DT::zero();
|
||||||
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
} while (!s_present_wait_thread.IsCancelling());
|
} while (!s_present_wait_thread.IsCancelling());
|
||||||
}
|
}
|
||||||
|
@ -62,9 +85,9 @@ void StopPresentWaitThread()
|
||||||
s_present_wait_thread.Shutdown();
|
s_present_wait_thread.Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain)
|
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain, bool vsync)
|
||||||
{
|
{
|
||||||
s_present_wait_thread.EmplaceItem(Wait{present_id, swapchain});
|
s_present_wait_thread.EmplaceItem(Wait{present_id, swapchain, vsync});
|
||||||
}
|
}
|
||||||
|
|
||||||
void FlushPresentWaitQueue()
|
void FlushPresentWaitQueue()
|
||||||
|
|
|
@ -12,7 +12,7 @@ namespace Vulkan
|
||||||
void StartPresentWaitThread();
|
void StartPresentWaitThread();
|
||||||
void StopPresentWaitThread();
|
void StopPresentWaitThread();
|
||||||
|
|
||||||
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain);
|
void PresentQueued(u64 present_id, VkSwapchainKHR swapchain, bool vsync);
|
||||||
void FlushPresentWaitQueue();
|
void FlushPresentWaitQueue();
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -395,6 +395,7 @@ void VKGfx::OnConfigChanged(u32 bits)
|
||||||
if (m_swap_chain && (bits & CONFIG_CHANGE_BIT_VSYNC))
|
if (m_swap_chain && (bits & CONFIG_CHANGE_BIT_VSYNC))
|
||||||
{
|
{
|
||||||
ExecuteCommandBuffer(false, true);
|
ExecuteCommandBuffer(false, true);
|
||||||
|
g_command_buffer_mgr->SetVSync(g_ActiveConfig.bVSyncActive);
|
||||||
m_swap_chain->SetVSync(g_ActiveConfig.bVSyncActive);
|
m_swap_chain->SetVSync(g_ActiveConfig.bVSyncActive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -198,7 +198,7 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi)
|
||||||
UpdateActiveConfig();
|
UpdateActiveConfig();
|
||||||
|
|
||||||
// Create command buffers. We do this separately because the other classes depend on it.
|
// Create command buffers. We do this separately because the other classes depend on it.
|
||||||
g_command_buffer_mgr = std::make_unique<CommandBufferManager>(g_Config.bBackendMultithreading);
|
g_command_buffer_mgr = std::make_unique<CommandBufferManager>(g_Config.bBackendMultithreading, g_ActiveConfig.bVSyncActive);
|
||||||
if (!g_command_buffer_mgr->Initialize())
|
if (!g_command_buffer_mgr->Initialize())
|
||||||
{
|
{
|
||||||
PanicAlertFmt("Failed to create Vulkan command buffers");
|
PanicAlertFmt("Failed to create Vulkan command buffers");
|
||||||
|
|
|
@ -106,6 +106,8 @@ add_library(videocommon
|
||||||
OnScreenUIKeyMap.h
|
OnScreenUIKeyMap.h
|
||||||
OpcodeDecoding.cpp
|
OpcodeDecoding.cpp
|
||||||
OpcodeDecoding.h
|
OpcodeDecoding.h
|
||||||
|
PendingTimeOffset.cpp
|
||||||
|
PendingTimeOffset.h
|
||||||
PerfQueryBase.cpp
|
PerfQueryBase.cpp
|
||||||
PerfQueryBase.h
|
PerfQueryBase.h
|
||||||
PerformanceMetrics.cpp
|
PerformanceMetrics.cpp
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
#include "PendingTimeOffset.h"
|
||||||
|
|
||||||
|
// export
|
||||||
|
PendingTimeOffset s_pending_time_offset;
|
|
@ -0,0 +1,15 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
#include "Common/CommonTypes.h"
|
||||||
|
|
||||||
|
constexpr u64 TARGET_VSYNC_BLOCK_US = 2'000;
|
||||||
|
|
||||||
|
struct PendingTimeOffset
|
||||||
|
{
|
||||||
|
std::mutex Lock;
|
||||||
|
DT Offset__;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern PendingTimeOffset s_pending_time_offset;
|
Loading…
Reference in New Issue