GS/Vulkan: Purge threaded presentation

This commit is contained in:
Stenzek 2024-05-26 13:01:58 +10:00 committed by Connor McLaughlin
parent d94f1dd9a3
commit c94282ce5f
8 changed files with 40 additions and 206 deletions

View File

@ -227,7 +227,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useDebugDevice, "EmuCore/GS", "UseDebugDevice", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.skipPresentingDuplicateFrames, "EmuCore/GS", "SkipDuplicateFrames", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.disableMailboxPresentation, "EmuCore/GS", "DisableMailboxPresentation", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.threadedPresentation, "EmuCore/GS", "DisableThreadedPresentation", false);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.exclusiveFullscreenControl, "EmuCore/GS", "ExclusiveFullscreenControl", -1, -1);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideTextureBarriers, "EmuCore/GS", "OverrideTextureBarriers", -1, -1);
SettingWidgetBinder::BindWidgetToIntSetting(
@ -335,7 +334,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
m_ui.useBlitSwapChain = nullptr;
m_ui.disableMailboxPresentation = nullptr;
m_ui.skipPresentingDuplicateFrames = nullptr;
m_ui.threadedPresentation = nullptr;
m_ui.overrideTextureBarriers = nullptr;
m_ui.disableFramebufferFetch = nullptr;
m_ui.disableShaderCache = nullptr;
@ -767,11 +765,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
tr("Forces the use of FIFO over Mailbox presentation, i.e. double buffering instead of triple buffering. "
"Usually results in worse frame pacing."));
dialog->registerWidgetHelp(m_ui.threadedPresentation, tr("Disable Threaded Presentation"), tr("Unchecked"),
tr("Presents frames on the main GS thread instead of a worker thread. Used for debugging frametime issues. "
"Could reduce chance of missing a frame or reduce tearing at the expense of more erratic frame times. "
"Only applies to the Vulkan renderer."));
dialog->registerWidgetHelp(m_ui.useDebugDevice, tr("Enable Debug Device"), tr("Unchecked"),
tr("Enables API-level validation of graphics commands."));

View File

@ -2052,13 +2052,6 @@
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="threadedPresentation">
<property name="text">
<string>Disable Threaded Presentation</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QCheckBox" name="disableMailboxPresentation">
<property name="text">
<string>Disable Mailbox Presentation</string>

View File

@ -602,7 +602,6 @@ struct Pcsx2Config
DisableShaderCache : 1,
DisableFramebufferFetch : 1,
DisableVertexShaderExpand : 1,
DisableThreadedPresentation : 1,
SkipDuplicateFrames : 1,
OsdShowMessages : 1,
OsdShowSpeed : 1,

View File

@ -1008,7 +1008,6 @@ void GSDeviceVK::WaitForFenceCounter(u64 fence_counter)
void GSDeviceVK::WaitForGPUIdle()
{
WaitForPresentComplete();
vkDeviceWaitIdle(m_device);
}
@ -1050,19 +1049,12 @@ void GSDeviceVK::ScanForCommandBufferCompletion()
void GSDeviceVK::WaitForCommandBufferCompletion(u32 index)
{
// We might be waiting for the buffer we just submitted to the worker thread.
if (m_queued_present.command_buffer_index == index && !m_present_done.load(std::memory_order_acquire))
{
Console.WarningFmt("Waiting for threaded submission of cmdbuffer {}", index);
WaitForPresentComplete();
}
// Wait for this command buffer to be completed.
const VkResult res = vkWaitForFences(m_device, 1, &m_frame_resources[index].fence, VK_TRUE, UINT64_MAX);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
m_last_submit_failed.store(true, std::memory_order_release);
m_last_submit_failed = true;
return;
}
@ -1085,8 +1077,7 @@ void GSDeviceVK::WaitForCommandBufferCompletion(u32 index)
m_completed_fence_counter = now_completed_counter;
}
void GSDeviceVK::SubmitCommandBuffer(
VKSwapChain* present_swap_chain /* = nullptr */, bool submit_on_thread /* = false */)
void GSDeviceVK::SubmitCommandBuffer(VKSwapChain* present_swap_chain)
{
FrameResources& resources = m_frame_resources[m_current_frame];
@ -1154,33 +1145,9 @@ void GSDeviceVK::SubmitCommandBuffer(
if (spin_cycles != 0)
WaitForSpinCompletion(m_current_frame);
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
if (spin_enabled && m_optional_extensions.vk_ext_calibrated_timestamps)
resources.submit_timestamp = GetCPUTimestamp();
// Don't use threaded presentation when spinning is enabled. ScanForCommandBufferCompletion()
// calls vkGetFenceStatus(), which reads a fence that has been passed off to the thread.
if (!submit_on_thread || GSConfig.HWSpinGPUForReadbacks || !m_present_thread.joinable())
{
DoSubmitCommandBuffer(m_current_frame, present_swap_chain, spin_cycles);
if (present_swap_chain)
DoPresent(present_swap_chain);
return;
}
m_queued_present.command_buffer_index = m_current_frame;
m_queued_present.swap_chain = present_swap_chain;
m_queued_present.spin_cycles = spin_cycles;
m_present_done.store(false, std::memory_order_release);
m_present_queued_cv.notify_one();
}
void GSDeviceVK::DoSubmitCommandBuffer(u32 index, VKSwapChain* present_swap_chain, u32 spin_cycles)
{
FrameResources& resources = m_frame_resources[index];
uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkSemaphore semas[2];
VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
@ -1197,7 +1164,7 @@ void GSDeviceVK::DoSubmitCommandBuffer(u32 index, VKSwapChain* present_swap_chai
if (spin_cycles != 0)
{
semas[0] = present_swap_chain->GetRenderingFinishedSemaphore();
semas[1] = m_spin_resources[index].semaphore;
semas[1] = m_spin_resources[m_current_frame].semaphore;
submit_info.signalSemaphoreCount = 2;
submit_info.pSignalSemaphores = semas;
}
@ -1210,105 +1177,44 @@ void GSDeviceVK::DoSubmitCommandBuffer(u32 index, VKSwapChain* present_swap_chai
else if (spin_cycles != 0)
{
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &m_spin_resources[index].semaphore;
submit_info.pSignalSemaphores = &m_spin_resources[m_current_frame].semaphore;
}
const VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: ");
m_last_submit_failed.store(true, std::memory_order_release);
m_last_submit_failed = true;
return;
}
if (spin_cycles != 0)
SubmitSpinCommand(index, spin_cycles);
}
SubmitSpinCommand(m_current_frame, spin_cycles);
void GSDeviceVK::DoPresent(VKSwapChain* present_swap_chain)
{
const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, nullptr, 1,
present_swap_chain->GetRenderingFinishedSemaphorePtr(), 1, present_swap_chain->GetSwapChainPtr(),
present_swap_chain->GetCurrentImageIndexPtr(), nullptr};
present_swap_chain->ReleaseCurrentImage();
const VkResult res = vkQueuePresentKHR(m_present_queue, &present_info);
if (res != VK_SUCCESS)
if (present_swap_chain)
{
// VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR)
LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");
const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, nullptr, 1,
present_swap_chain->GetRenderingFinishedSemaphorePtr(), 1, present_swap_chain->GetSwapChainPtr(),
present_swap_chain->GetCurrentImageIndexPtr(), nullptr};
m_last_present_failed.store(true, std::memory_order_release);
return;
present_swap_chain->ReleaseCurrentImage();
const VkResult res = vkQueuePresentKHR(m_present_queue, &present_info);
if (res != VK_SUCCESS)
{
// VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR)
LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");
m_last_present_failed = true;
return;
}
// Grab the next image as soon as possible, that way we spend less time blocked on the next
// submission. Don't care if it fails, we'll deal with that at the presentation call site.
// Credit to dxvk for the idea.
present_swap_chain->AcquireNextImage();
}
// Grab the next image as soon as possible, that way we spend less time blocked on the next
// submission. Don't care if it fails, we'll deal with that at the presentation call site.
// Credit to dxvk for the idea.
present_swap_chain->AcquireNextImage();
}
void GSDeviceVK::WaitForPresentComplete()
{
if (m_present_done.load(std::memory_order_acquire))
return;
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
}
void GSDeviceVK::WaitForPresentComplete(std::unique_lock<std::mutex>& lock)
{
if (m_present_done.load(std::memory_order_acquire))
return;
m_present_done_cv.wait(lock, [this]() { return m_present_done.load(std::memory_order_acquire); });
}
void GSDeviceVK::PresentThread()
{
std::unique_lock<std::mutex> lock(m_present_mutex);
while (!m_present_thread_done.load(std::memory_order_acquire))
{
m_present_queued_cv.wait(lock, [this]() {
return !m_present_done.load(std::memory_order_acquire) ||
m_present_thread_done.load(std::memory_order_acquire);
});
if (m_present_done.load(std::memory_order_acquire))
continue;
DoSubmitCommandBuffer(
m_queued_present.command_buffer_index, m_queued_present.swap_chain, m_queued_present.spin_cycles);
if (m_queued_present.swap_chain)
DoPresent(m_queued_present.swap_chain);
m_present_done.store(true, std::memory_order_release);
m_present_done_cv.notify_one();
}
}
void GSDeviceVK::StartPresentThread()
{
pxAssert(!m_present_thread.joinable());
m_present_thread_done.store(false, std::memory_order_release);
m_present_thread = std::thread(&GSDeviceVK::PresentThread, this);
}
void GSDeviceVK::StopPresentThread()
{
if (!m_present_thread.joinable())
return;
{
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
m_present_thread_done.store(true, std::memory_order_release);
m_present_queued_cv.notify_one();
}
m_present_thread.join();
}
void GSDeviceVK::CommandBufferCompleted(u32 index)
@ -1411,12 +1317,11 @@ void GSDeviceVK::ActivateCommandBuffer(u32 index)
void GSDeviceVK::ExecuteCommandBuffer(WaitType wait_for_completion)
{
if (m_last_submit_failed.load(std::memory_order_acquire))
if (m_last_submit_failed)
return;
// If we're waiting for completion, don't bother waking the worker thread.
const u32 current_frame = m_current_frame;
SubmitCommandBuffer();
SubmitCommandBuffer(nullptr);
MoveToNextCommandBuffer();
if (wait_for_completion != WaitType::None)
@ -1433,16 +1338,6 @@ void GSDeviceVK::ExecuteCommandBuffer(WaitType wait_for_completion)
}
}
bool GSDeviceVK::CheckLastPresentFail()
{
return m_last_present_failed.exchange(false, std::memory_order_acq_rel);
}
bool GSDeviceVK::CheckLastSubmitFail()
{
return m_last_submit_failed.load(std::memory_order_acquire);
}
void GSDeviceVK::DeferBufferDestruction(VkBuffer object, VmaAllocation allocation)
{
FrameResources& resources = m_frame_resources[m_current_frame];
@ -1809,7 +1704,7 @@ void GSDeviceVK::WaitForSpinCompletion(u32 index)
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
m_last_submit_failed.store(true, std::memory_order_release);
m_last_submit_failed = true;
return;
}
SpinCommandCompleted(index);
@ -2169,7 +2064,6 @@ void GSDeviceVK::Destroy()
WaitForGPUIdle();
}
StopPresentThread();
m_swap_chain.reset();
DestroySpinResources();
@ -2334,6 +2228,10 @@ GSDevice::PresentResult GSDeviceVK::BeginPresent(bool frame_skip)
{
EndRenderPass();
// Check if the device was lost.
if (m_last_submit_failed)
return PresentResult::DeviceLost;
if (frame_skip)
return PresentResult::FrameSkipped;
@ -2344,13 +2242,6 @@ GSDevice::PresentResult GSDeviceVK::BeginPresent(bool frame_skip)
return PresentResult::FrameSkipped;
}
// Previous frame needs to be presented before we can acquire the swap chain.
WaitForPresentComplete();
// Check if the device was lost.
if (CheckLastSubmitFail())
return PresentResult::DeviceLost;
VkResult res = m_swap_chain->AcquireNextImage();
if (res != VK_SUCCESS)
{
@ -2422,7 +2313,7 @@ void GSDeviceVK::EndPresent()
m_swap_chain->GetCurrentTexture()->TransitionToLayout(cmdbuffer, GSTextureVK::Layout::PresentSrc);
g_perfmon.Put(GSPerfMon::RenderPasses, 1);
SubmitCommandBuffer(m_swap_chain.get(), !m_swap_chain->IsPresentModeSynchronizing());
SubmitCommandBuffer(m_swap_chain.get());
MoveToNextCommandBuffer();
InvalidateCachedState();
@ -2621,9 +2512,6 @@ bool GSDeviceVK::CreateDeviceAndSwapChain()
VKShaderCache::Create();
if (!GSConfig.DisableThreadedPresentation)
StartPresentThread();
if (surface != VK_NULL_HANDLE)
{
VkPresentModeKHR present_mode;
@ -4554,7 +4442,7 @@ void GSDeviceVK::RenderBlankFrame()
cmdbuffer, sctex->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &s_present_clear_color.color, 1, &srr);
m_swap_chain->GetCurrentTexture()->TransitionToLayout(cmdbuffer, GSTextureVK::Layout::PresentSrc);
SubmitCommandBuffer(m_swap_chain.get(), !m_swap_chain->IsPresentModeSynchronizing());
SubmitCommandBuffer(m_swap_chain.get());
ActivateCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS);
}

View File

@ -136,7 +136,7 @@ private:
bool EnableDebugUtils();
void DisableDebugUtils();
void SubmitCommandBuffer(VKSwapChain* present_swap_chain = nullptr, bool submit_on_thread = false);
void SubmitCommandBuffer(VKSwapChain* present_swap_chain);
void MoveToNextCommandBuffer();
enum class WaitType
@ -148,11 +148,6 @@ private:
static WaitType GetWaitType(bool wait, bool spin);
void ExecuteCommandBuffer(WaitType wait_for_completion);
void WaitForPresentComplete();
// Was the last present submitted to the queue a failure? If so, we must recreate our swapchain.
bool CheckLastPresentFail();
bool CheckLastSubmitFail();
// Allocates a temporary CPU staging buffer, fires the callback with it to populate, then copies to a GPU buffer.
bool AllocatePreinitializedGPUBuffer(u32 size, VkBuffer* gpu_buffer, VmaAllocation* gpu_allocation,
@ -195,13 +190,6 @@ private:
void ScanForCommandBufferCompletion();
void WaitForCommandBufferCompletion(u32 index);
void DoSubmitCommandBuffer(u32 index, VKSwapChain* present_swap_chain, u32 spin_cycles);
void DoPresent(VKSwapChain* present_swap_chain);
void WaitForPresentComplete(std::unique_lock<std::mutex>& lock);
void PresentThread();
void StartPresentThread();
void StopPresentThread();
bool InitSpinResources();
void DestroySpinResources();
void WaitForSpinCompletion(u32 index);
@ -283,23 +271,8 @@ private:
u64 m_completed_fence_counter = 0;
u32 m_current_frame = 0;
std::atomic_bool m_last_submit_failed{false};
std::atomic_bool m_last_present_failed{false};
std::atomic_bool m_present_done{true};
std::mutex m_present_mutex;
std::condition_variable m_present_queued_cv;
std::condition_variable m_present_done_cv;
std::thread m_present_thread;
std::atomic_bool m_present_thread_done{false};
struct QueuedPresent
{
VKSwapChain* swap_chain;
u32 command_buffer_index;
u32 spin_cycles;
};
QueuedPresent m_queued_present = {nullptr, 0xFFFFFFFFu, 0};
bool m_last_submit_failed = false;
bool m_last_present_failed = false;
std::map<u32, VkRenderPass> m_render_pass_cache;

View File

@ -65,10 +65,6 @@ public:
return &m_semaphores[m_current_semaphore].rendering_finished_semaphore;
}
// Returns true if the current present mode is synchronizing.
__fi bool IsPresentModeSynchronizing() const { return (m_present_mode == VK_PRESENT_MODE_FIFO_KHR); }
__fi VkPresentModeKHR GetPresentMode() const { return m_present_mode; }
VkFormat GetTextureFormat() const;
VkResult AcquireNextImage();
void ReleaseCurrentImage();

View File

@ -3900,9 +3900,6 @@ void FullscreenUI::DrawGraphicsSettingsPage(SettingsInterface* bsi, bool show_ad
FSUI_CSTR("Skips displaying frames that don't change in 25/30fps games. Can improve speed, but increase input lag/make frame pacing "
"worse."),
"EmuCore/GS", "SkipDuplicateFrames", false);
DrawToggleSetting(bsi, FSUI_CSTR("Disable Threaded Presentation"),
FSUI_CSTR("Presents frames on the main GS thread instead of a worker thread. Used for debugging frametime issues."),
"EmuCore/GS", "DisableThreadedPresentation", false);
DrawToggleSetting(bsi, FSUI_CSTR("Disable Mailbox Presentation"),
FSUI_CSTR("Forces the use of FIFO over Mailbox presentation, i.e. double buffering instead of triple buffering. "
"Usually results in worse frame pacing."),
@ -7108,8 +7105,6 @@ TRANSLATE_NOOP("FullscreenUI", "Applies a shader which replicates the visual eff
TRANSLATE_NOOP("FullscreenUI", "Advanced");
TRANSLATE_NOOP("FullscreenUI", "Skip Presenting Duplicate Frames");
TRANSLATE_NOOP("FullscreenUI", "Skips displaying frames that don't change in 25/30fps games. Can improve speed, but increase input lag/make frame pacing worse.");
TRANSLATE_NOOP("FullscreenUI", "Disable Threaded Presentation");
TRANSLATE_NOOP("FullscreenUI", "Presents frames on the main GS thread instead of a worker thread. Used for debugging frametime issues.");
TRANSLATE_NOOP("FullscreenUI", "Disable Mailbox Presentation");
TRANSLATE_NOOP("FullscreenUI", "Forces the use of FIFO over Mailbox presentation, i.e. double buffering instead of triple buffering. Usually results in worse frame pacing.");
TRANSLATE_NOOP("FullscreenUI", "Hardware Download Mode");

View File

@ -621,7 +621,6 @@ Pcsx2Config::GSOptions::GSOptions()
DisableShaderCache = false;
DisableFramebufferFetch = false;
DisableVertexShaderExpand = false;
DisableThreadedPresentation = false;
SkipDuplicateFrames = false;
OsdShowMessages = true;
OsdShowSpeed = false;
@ -781,7 +780,6 @@ bool Pcsx2Config::GSOptions::RestartOptionsAreEqual(const GSOptions& right) cons
OpEqu(DisableShaderCache) &&
OpEqu(DisableFramebufferFetch) &&
OpEqu(DisableVertexShaderExpand) &&
OpEqu(DisableThreadedPresentation) &&
OpEqu(OverrideTextureBarriers) &&
OpEqu(ExclusiveFullscreenControl);
}
@ -825,7 +823,6 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap)
SettingsWrapBitBool(DisableShaderCache);
SettingsWrapBitBool(DisableFramebufferFetch);
SettingsWrapBitBool(DisableVertexShaderExpand);
SettingsWrapBitBool(DisableThreadedPresentation);
SettingsWrapBitBool(SkipDuplicateFrames);
SettingsWrapBitBool(OsdShowMessages);
SettingsWrapBitBool(OsdShowSpeed);