mirror of https://github.com/RPCS3/rpcs3.git
vk: Solve GPU hang/reset due to waiting on events that are never signaled
- TODO: Some refactoring may be required to pair the primary and secondary CB and avoid such blunders
This commit is contained in:
parent
3e8a00d264
commit
608f8de347
|
@ -30,6 +30,17 @@ namespace vk
|
||||||
{
|
{
|
||||||
for (auto&& job : m_event_queue.pop_all())
|
for (auto&& job : m_event_queue.pop_all())
|
||||||
{
|
{
|
||||||
|
if (job->type == xqueue_event_type::barrier)
|
||||||
|
{
|
||||||
|
// Blocks the queue from progressing until the work items are actually submitted to the GPU
|
||||||
|
// Avoids spamming the GPU with event requests when the events have not even been submitted yet
|
||||||
|
while (job->completion_eid == m_submit_count.load())
|
||||||
|
{
|
||||||
|
thread_ctrl::wait_for(100);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
vk::wait_for_event(job->queue1_signal.get(), GENERAL_WAIT_TIMEOUT);
|
vk::wait_for_event(job->queue1_signal.get(), GENERAL_WAIT_TIMEOUT);
|
||||||
job->queue2_signal->host_signal();
|
job->queue2_signal->host_signal();
|
||||||
}
|
}
|
||||||
|
@ -62,7 +73,12 @@ namespace vk
|
||||||
{
|
{
|
||||||
auto ev1 = std::make_unique<event>(*get_current_renderer(), sync_domain::gpu);
|
auto ev1 = std::make_unique<event>(*get_current_renderer(), sync_domain::gpu);
|
||||||
auto ev2 = std::make_unique<event>(*get_current_renderer(), sync_domain::gpu);
|
auto ev2 = std::make_unique<event>(*get_current_renderer(), sync_domain::gpu);
|
||||||
m_events_pool.emplace_back(ev1, ev2, 0ull);
|
m_events_pool.emplace_back(ev1, ev2, 0ull, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (usz i = 0; i < VK_MAX_ASYNC_COMPUTE_QUEUES; ++i)
|
||||||
|
{
|
||||||
|
m_barriers_pool.emplace_back(0ull, 0xFFFF0000 + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -80,6 +96,7 @@ namespace vk
|
||||||
|
|
||||||
ensure(sync_label->completion_eid <= vk::last_completed_event_id());
|
ensure(sync_label->completion_eid <= vk::last_completed_event_id());
|
||||||
|
|
||||||
|
m_sync_label_debug_uid = sync_label->uid;
|
||||||
sync_label->queue1_signal->reset();
|
sync_label->queue1_signal->reset();
|
||||||
sync_label->queue2_signal->reset();
|
sync_label->queue2_signal->reset();
|
||||||
sync_label->completion_eid = vk::current_event_id();
|
sync_label->completion_eid = vk::current_event_id();
|
||||||
|
@ -143,6 +160,11 @@ namespace vk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 3. Insert a barrier for this CB. A job is about to be scheduled on it immediately.
|
||||||
|
auto barrier = &m_barriers_pool[m_next_cb_index];
|
||||||
|
barrier->completion_eid = m_submit_count;
|
||||||
|
m_event_queue.push(barrier);
|
||||||
|
|
||||||
m_next_cb_index++;
|
m_next_cb_index++;
|
||||||
return m_current_cb;
|
return m_current_cb;
|
||||||
}
|
}
|
||||||
|
@ -160,6 +182,11 @@ namespace vk
|
||||||
return std::exchange(m_sync_label, nullptr);
|
return std::exchange(m_sync_label, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 AsyncTaskScheduler::get_primary_sync_label_debug_uid()
|
||||||
|
{
|
||||||
|
return std::exchange(m_sync_label_debug_uid, ~0ull);
|
||||||
|
}
|
||||||
|
|
||||||
void AsyncTaskScheduler::flush(VkBool32 force_flush, VkSemaphore wait_semaphore, VkPipelineStageFlags wait_dst_stage_mask)
|
void AsyncTaskScheduler::flush(VkBool32 force_flush, VkSemaphore wait_semaphore, VkPipelineStageFlags wait_dst_stage_mask)
|
||||||
{
|
{
|
||||||
if (!m_current_cb)
|
if (!m_current_cb)
|
||||||
|
@ -176,6 +203,9 @@ namespace vk
|
||||||
m_current_cb->end();
|
m_current_cb->end();
|
||||||
m_current_cb->submit(get_current_renderer()->get_transfer_queue(), wait_semaphore, VK_NULL_HANDLE, nullptr, wait_dst_stage_mask, force_flush);
|
m_current_cb->submit(get_current_renderer()->get_transfer_queue(), wait_semaphore, VK_NULL_HANDLE, nullptr, wait_dst_stage_mask, force_flush);
|
||||||
|
|
||||||
|
m_submit_count++;
|
||||||
|
thread_ctrl::notify(g_fxo->get<async_scheduler_thread>());
|
||||||
|
|
||||||
m_last_used_cb = m_current_cb;
|
m_last_used_cb = m_current_cb;
|
||||||
m_current_cb = nullptr;
|
m_current_cb = nullptr;
|
||||||
m_sync_required = false;
|
m_sync_required = false;
|
||||||
|
|
|
@ -9,15 +9,32 @@
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
|
enum class xqueue_event_type
|
||||||
|
{
|
||||||
|
label,
|
||||||
|
barrier
|
||||||
|
};
|
||||||
|
|
||||||
struct xqueue_event
|
struct xqueue_event
|
||||||
{
|
{
|
||||||
|
// Type
|
||||||
|
xqueue_event_type type;
|
||||||
|
|
||||||
|
// Payload
|
||||||
std::unique_ptr<event> queue1_signal;
|
std::unique_ptr<event> queue1_signal;
|
||||||
std::unique_ptr<event> queue2_signal;
|
std::unique_ptr<event> queue2_signal;
|
||||||
u64 completion_eid;
|
|
||||||
|
|
||||||
xqueue_event(): completion_eid(0) {}
|
// Identifiers
|
||||||
xqueue_event(std::unique_ptr<event>& trigger, std::unique_ptr<event>& payload, u64 eid)
|
u64 completion_eid;
|
||||||
: queue1_signal(std::move(trigger)), queue2_signal(std::move(payload)), completion_eid(eid)
|
u64 uid;
|
||||||
|
|
||||||
|
xqueue_event(u64 eid, u64 _uid)
|
||||||
|
: type(xqueue_event_type::barrier), completion_eid(eid), uid(_uid)
|
||||||
|
{}
|
||||||
|
|
||||||
|
xqueue_event(std::unique_ptr<event>& trigger, std::unique_ptr<event>& payload, u64 eid, u64 _uid)
|
||||||
|
: type(xqueue_event_type::label), queue1_signal(std::move(trigger)), queue2_signal(std::move(payload)),
|
||||||
|
completion_eid(eid), uid(_uid)
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -31,6 +48,8 @@ namespace vk
|
||||||
command_buffer* m_last_used_cb = nullptr;
|
command_buffer* m_last_used_cb = nullptr;
|
||||||
command_buffer* m_current_cb = nullptr;
|
command_buffer* m_current_cb = nullptr;
|
||||||
usz m_next_cb_index = 0;
|
usz m_next_cb_index = 0;
|
||||||
|
std::vector<xqueue_event> m_barriers_pool;
|
||||||
|
atomic_t<u64> m_submit_count = 0;
|
||||||
|
|
||||||
// Scheduler
|
// Scheduler
|
||||||
shared_mutex m_config_mutex;
|
shared_mutex m_config_mutex;
|
||||||
|
@ -40,6 +59,7 @@ namespace vk
|
||||||
// Sync
|
// Sync
|
||||||
event* m_sync_label = nullptr;
|
event* m_sync_label = nullptr;
|
||||||
atomic_t<bool> m_sync_required = false;
|
atomic_t<bool> m_sync_required = false;
|
||||||
|
u64 m_sync_label_debug_uid = 0;
|
||||||
|
|
||||||
static constexpr u32 events_pool_size = 16384;
|
static constexpr u32 events_pool_size = 16384;
|
||||||
std::vector<xqueue_event> m_events_pool;
|
std::vector<xqueue_event> m_events_pool;
|
||||||
|
@ -58,6 +78,7 @@ namespace vk
|
||||||
|
|
||||||
command_buffer* get_current();
|
command_buffer* get_current();
|
||||||
event* get_primary_sync_label();
|
event* get_primary_sync_label();
|
||||||
|
u64 get_primary_sync_label_debug_uid();
|
||||||
|
|
||||||
void flush(VkBool32 force_flush, VkSemaphore wait_semaphore = VK_NULL_HANDLE, VkPipelineStageFlags wait_dst_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
void flush(VkBool32 force_flush, VkSemaphore wait_semaphore = VK_NULL_HANDLE, VkPipelineStageFlags wait_dst_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||||
void kill();
|
void kill();
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "VKAsyncScheduler.h"
|
||||||
|
#include "VKDMA.h"
|
||||||
#include "VKRenderTargets.h"
|
#include "VKRenderTargets.h"
|
||||||
#include "VKResourceManager.h"
|
#include "VKResourceManager.h"
|
||||||
#include "VKDMA.h"
|
|
||||||
#include "vkutils/image_helpers.h"
|
#include "vkutils/image_helpers.h"
|
||||||
|
|
||||||
#include "../Common/texture_cache.h"
|
#include "../Common/texture_cache.h"
|
||||||
|
@ -1062,6 +1063,10 @@ namespace vk
|
||||||
|
|
||||||
if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
|
if (cmd.access_hint != vk::command_buffer::access_type_hint::all)
|
||||||
{
|
{
|
||||||
|
// Flush any pending async jobs in case of blockers
|
||||||
|
// TODO: Context-level manager should handle this logic
|
||||||
|
g_fxo->get<async_scheduler_thread>().flush(VK_TRUE);
|
||||||
|
|
||||||
// Primary access command queue, must restart it after
|
// Primary access command queue, must restart it after
|
||||||
vk::fence submit_fence(*m_device);
|
vk::fence submit_fence(*m_device);
|
||||||
cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, &submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_TRUE);
|
cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, &submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_TRUE);
|
||||||
|
|
Loading…
Reference in New Issue