mirror of https://github.com/RPCS3/rpcs3.git
rsx: Fix offloader deadlock
- Do not allow offloader to handle its own faults. Serialize them on RSX instead. This approach introduces a GPU race condition that should be avoided with improved synchronization. - TODO: Use proper GPU-side synchronization to avoid this situation
This commit is contained in:
parent
b70908c8f3
commit
9d981de96d
|
@ -895,7 +895,6 @@ void GLGSRender::on_init_thread()
|
|||
m_video_output_pass.create();
|
||||
|
||||
m_gl_texture_cache.initialize();
|
||||
m_thread_id = std::this_thread::get_id();
|
||||
|
||||
if (!supports_native_ui)
|
||||
{
|
||||
|
@ -1821,7 +1820,7 @@ void GLGSRender::flip(int buffer, bool emu_flip)
|
|||
|
||||
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
||||
{
|
||||
const bool can_flush = (std::this_thread::get_id() == m_thread_id);
|
||||
const bool can_flush = (std::this_thread::get_id() == m_rsx_thread);
|
||||
const rsx::invalidation_cause cause =
|
||||
is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write)
|
||||
: (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read);
|
||||
|
@ -1848,14 +1847,13 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
return true;
|
||||
}
|
||||
|
||||
void GLGSRender::on_invalidate_memory_range(const utils::address_range &range)
|
||||
void GLGSRender::on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause)
|
||||
{
|
||||
//Discard all memory in that range without bothering with writeback (Force it for strict?)
|
||||
gl::command_context cmd{ gl_state };
|
||||
auto data = std::move(m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::unmap));
|
||||
auto data = std::move(m_gl_texture_cache.invalidate_range(cmd, range, cause));
|
||||
AUDIT(data.empty());
|
||||
|
||||
if (data.violation_handled)
|
||||
if (cause == rsx::invalidation_cause::unmap && data.violation_handled)
|
||||
{
|
||||
m_gl_texture_cache.purge_unreleased_sections();
|
||||
{
|
||||
|
@ -1865,6 +1863,14 @@ void GLGSRender::on_invalidate_memory_range(const utils::address_range &range)
|
|||
}
|
||||
}
|
||||
|
||||
void GLGSRender::on_semaphore_acquire_wait()
|
||||
{
|
||||
if (!work_queue.empty())
|
||||
{
|
||||
do_local_task(rsx::FIFO_state::lock_wait);
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::do_local_task(rsx::FIFO_state state)
|
||||
{
|
||||
if (!work_queue.empty())
|
||||
|
|
|
@ -327,8 +327,6 @@ private:
|
|||
shared_mutex queue_guard;
|
||||
std::list<work_item> work_queue;
|
||||
|
||||
std::thread::id m_thread_id;
|
||||
|
||||
GLProgramBuffer m_prog_buffer;
|
||||
draw_context_t m_decompiler_context;
|
||||
|
||||
|
@ -397,8 +395,9 @@ protected:
|
|||
void do_local_task(rsx::FIFO_state state) override;
|
||||
|
||||
bool on_access_violation(u32 address, bool is_writing) override;
|
||||
void on_invalidate_memory_range(const utils::address_range &range) override;
|
||||
void on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause) override;
|
||||
void notify_tile_unbound(u32 tile) override;
|
||||
void on_semaphore_acquire_wait() override;
|
||||
|
||||
std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;
|
||||
std::array<std::vector<gsl::byte>, 2> copy_depth_stencil_buffer_to_memory() override;
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
#include "Common/BufferUtils.h"
|
||||
#include "Emu/System.h"
|
||||
#include "RSXOffload.h"
|
||||
#include "RSXThread.h"
|
||||
#include "rsx_utils.h"
|
||||
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
|
@ -27,6 +29,9 @@ namespace rsx
|
|||
return;
|
||||
}
|
||||
|
||||
// Register thread id
|
||||
m_thread_id = std::this_thread::get_id();
|
||||
|
||||
if (g_cfg.core.thread_scheduler_enabled)
|
||||
{
|
||||
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx));
|
||||
|
@ -36,22 +41,21 @@ namespace rsx
|
|||
{
|
||||
if (m_enqueued_count.load() != m_processed_count)
|
||||
{
|
||||
for (auto slice = m_work_queue.pop_all(); slice; slice.pop_front())
|
||||
for (m_current_job = m_work_queue.pop_all(); m_current_job; m_current_job.pop_front())
|
||||
{
|
||||
auto task = *slice;
|
||||
switch (task.type)
|
||||
switch (m_current_job->type)
|
||||
{
|
||||
case raw_copy:
|
||||
memcpy(task.dst, task.src, task.length);
|
||||
memcpy(m_current_job->dst, m_current_job->src, m_current_job->length);
|
||||
break;
|
||||
case vector_copy:
|
||||
memcpy(task.dst, task.opt_storage.data(), task.length);
|
||||
memcpy(m_current_job->dst, m_current_job->opt_storage.data(), m_current_job->length);
|
||||
break;
|
||||
case index_emulate:
|
||||
write_index_array_for_non_indexed_non_native_primitive_to_buffer(
|
||||
reinterpret_cast<char*>(task.dst),
|
||||
static_cast<rsx::primitive_type>(task.aux_param0),
|
||||
task.length);
|
||||
reinterpret_cast<char*>(m_current_job->dst),
|
||||
static_cast<rsx::primitive_type>(m_current_job->aux_param0),
|
||||
m_current_job->length);
|
||||
break;
|
||||
default:
|
||||
ASSUME(0);
|
||||
|
@ -116,6 +120,11 @@ namespace rsx
|
|||
}
|
||||
|
||||
// Synchronization
|
||||
bool dma_manager::is_current_thread() const
|
||||
{
|
||||
return (std::this_thread::get_id() == m_thread_id);
|
||||
}
|
||||
|
||||
void dma_manager::sync()
|
||||
{
|
||||
if (LIKELY(m_enqueued_count.load() == m_processed_count))
|
||||
|
@ -124,8 +133,25 @@ namespace rsx
|
|||
return;
|
||||
}
|
||||
|
||||
while (m_enqueued_count.load() != m_processed_count)
|
||||
_mm_pause();
|
||||
if (auto rsxthr = get_current_renderer(); rsxthr->is_current_thread())
|
||||
{
|
||||
if (m_mem_fault_flag)
|
||||
{
|
||||
// Abort if offloader is in recovery mode
|
||||
return;
|
||||
}
|
||||
|
||||
while (m_enqueued_count.load() != m_processed_count)
|
||||
{
|
||||
rsxthr->on_semaphore_acquire_wait();
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (m_enqueued_count.load() != m_processed_count)
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
|
||||
void dma_manager::join()
|
||||
|
@ -133,4 +159,50 @@ namespace rsx
|
|||
m_worker_state = thread_state::finished;
|
||||
sync();
|
||||
}
|
||||
|
||||
void dma_manager::set_mem_fault_flag()
|
||||
{
|
||||
verify("Access denied" HERE), is_current_thread();
|
||||
m_mem_fault_flag.release(true);
|
||||
}
|
||||
|
||||
void dma_manager::clear_mem_fault_flag()
|
||||
{
|
||||
verify("Access denied" HERE), is_current_thread();
|
||||
m_mem_fault_flag.release(false);
|
||||
}
|
||||
|
||||
// Fault recovery
|
||||
utils::address_range dma_manager::get_fault_range(bool writing) const
|
||||
{
|
||||
verify(HERE), m_current_job;
|
||||
|
||||
void *address = nullptr;
|
||||
u32 range = m_current_job->length;
|
||||
|
||||
switch (m_current_job->type)
|
||||
{
|
||||
case raw_copy:
|
||||
address = (writing) ? m_current_job->dst : m_current_job->src;
|
||||
break;
|
||||
case vector_copy:
|
||||
verify(HERE), writing;
|
||||
address = m_current_job->dst;
|
||||
break;
|
||||
case index_emulate:
|
||||
verify(HERE), writing;
|
||||
address = m_current_job->dst;
|
||||
range = get_index_count(static_cast<rsx::primitive_type>(m_current_job->aux_param0), m_current_job->length);
|
||||
break;
|
||||
default:
|
||||
ASSUME(0);
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
|
||||
const uintptr_t addr = uintptr_t(address);
|
||||
const uintptr_t base = uintptr_t(vm::g_base_addr);
|
||||
|
||||
verify(HERE), addr > base;
|
||||
return utils::address_range::start_length(u32(addr - base), range);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,9 +3,11 @@
|
|||
#include "Utilities/types.h"
|
||||
#include "Utilities/lockless.h"
|
||||
#include "Utilities/Thread.h"
|
||||
#include "Utilities/address_range.h"
|
||||
#include "gcm_enums.h"
|
||||
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
|
@ -42,9 +44,12 @@ namespace rsx
|
|||
};
|
||||
|
||||
lf_queue<transport_packet> m_work_queue;
|
||||
lf_queue_slice<transport_packet> m_current_job;
|
||||
atomic_t<u64> m_enqueued_count{ 0 };
|
||||
volatile u64 m_processed_count = 0;
|
||||
thread_state m_worker_state = thread_state::detached;
|
||||
std::thread::id m_thread_id;
|
||||
atomic_t<bool> m_mem_fault_flag{ false };
|
||||
|
||||
// TODO: Improved benchmarks here; value determined by profiling on a Ryzen CPU, rounded to the nearest 512 bytes
|
||||
const u32 max_immediate_transfer_size = 3584;
|
||||
|
@ -63,8 +68,14 @@ namespace rsx
|
|||
void emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count);
|
||||
|
||||
// Synchronization
|
||||
bool is_current_thread() const;
|
||||
void sync();
|
||||
void join();
|
||||
void set_mem_fault_flag();
|
||||
void clear_mem_fault_flag();
|
||||
|
||||
// Fault recovery
|
||||
utils::address_range get_fault_range(bool writing) const;
|
||||
};
|
||||
|
||||
extern dma_manager g_dma_manager;
|
||||
|
|
|
@ -917,27 +917,6 @@ namespace rsx
|
|||
fmt::throw_exception("ill-formed draw command" HERE);
|
||||
}
|
||||
|
||||
void thread::do_internal_task()
|
||||
{
|
||||
if (m_internal_tasks.empty())
|
||||
{
|
||||
std::this_thread::yield();
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Disabled" HERE);
|
||||
//std::lock_guard lock(m_mtx_task);
|
||||
|
||||
//internal_task_entry &front = m_internal_tasks.front();
|
||||
|
||||
//if (front.callback())
|
||||
//{
|
||||
// front.promise.set_value();
|
||||
// m_internal_tasks.pop_front();
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
void thread::do_local_task(FIFO_state state)
|
||||
{
|
||||
if (async_flip_requested & flip_request::emu_requested)
|
||||
|
@ -2465,7 +2444,7 @@ namespace rsx
|
|||
if (!m_invalidated_memory_range.valid())
|
||||
return;
|
||||
|
||||
on_invalidate_memory_range(m_invalidated_memory_range);
|
||||
on_invalidate_memory_range(m_invalidated_memory_range, rsx::invalidation_cause::unmap);
|
||||
m_invalidated_memory_range.invalidate();
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "rsx_methods.h"
|
||||
#include "rsx_utils.h"
|
||||
#include "Overlays/overlays.h"
|
||||
#include "Common/texture_cache_utils.h"
|
||||
|
||||
#include "Utilities/Thread.h"
|
||||
#include "Utilities/geometry.h"
|
||||
|
@ -418,8 +419,8 @@ namespace rsx
|
|||
|
||||
protected:
|
||||
std::thread::id m_rsx_thread;
|
||||
atomic_t<bool> m_rsx_thread_exiting{true};
|
||||
s32 m_return_addr{-1}, restore_ret{-1};
|
||||
atomic_t<bool> m_rsx_thread_exiting{ true };
|
||||
|
||||
std::array<push_buffer_vertex_info, 16> vertex_push_buffers;
|
||||
std::vector<u32> element_push_buffer;
|
||||
|
||||
|
@ -433,6 +434,7 @@ namespace rsx
|
|||
// FIFO
|
||||
std::unique_ptr<FIFO::FIFO_control> fifo_ctrl;
|
||||
FIFO::flattening_helper m_flattener;
|
||||
s32 m_return_addr{ -1 }, restore_ret{ -1 };
|
||||
|
||||
// Occlusion query
|
||||
bool zcull_surface_active = false;
|
||||
|
@ -605,7 +607,7 @@ namespace rsx
|
|||
virtual void flip(int buffer, bool emu_flip = false) = 0;
|
||||
virtual u64 timestamp();
|
||||
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
|
||||
virtual void on_invalidate_memory_range(const address_range & /*range*/) {}
|
||||
virtual void on_invalidate_memory_range(const address_range & /*range*/, rsx::invalidation_cause) {}
|
||||
virtual void notify_tile_unbound(u32 /*tile*/) {}
|
||||
|
||||
// zcull
|
||||
|
@ -661,18 +663,6 @@ namespace rsx
|
|||
private:
|
||||
shared_mutex m_mtx_task;
|
||||
|
||||
struct internal_task_entry
|
||||
{
|
||||
std::function<bool()> callback;
|
||||
//std::promise<void> promise;
|
||||
|
||||
internal_task_entry(std::function<bool()> callback) : callback(std::move(callback))
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
std::deque<internal_task_entry> m_internal_tasks;
|
||||
void do_internal_task();
|
||||
void handle_emu_flip(u32 buffer);
|
||||
void handle_invalidated_memory_range();
|
||||
|
||||
|
@ -732,7 +722,7 @@ namespace rsx
|
|||
/**
|
||||
* Notify to check internal state during semaphore wait
|
||||
*/
|
||||
void on_semaphore_acquire_wait() { do_local_task(FIFO_state::lock_wait); }
|
||||
virtual void on_semaphore_acquire_wait() {}
|
||||
|
||||
/**
|
||||
* Copy rtt values to buffer.
|
||||
|
@ -767,7 +757,10 @@ namespace rsx
|
|||
void pause();
|
||||
void unpause();
|
||||
|
||||
//Get RSX approximate load in %
|
||||
// Get RSX approximate load in %
|
||||
u32 get_load();
|
||||
|
||||
// Returns true if the current thread is the active RSX thread
|
||||
bool is_current_thread() const { return std::this_thread::get_id() == m_rsx_thread; }
|
||||
};
|
||||
}
|
||||
|
|
|
@ -662,10 +662,29 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
|
||||
if (result.num_flushable > 0)
|
||||
{
|
||||
const bool is_rsxthr = std::this_thread::get_id() == m_rsx_thread;
|
||||
bool has_queue_ref = false;
|
||||
if (rsx::g_dma_manager.is_current_thread())
|
||||
{
|
||||
// The offloader thread cannot handle flush requests
|
||||
verify(HERE), m_queue_status.load() == flush_queue_state::ok;
|
||||
|
||||
if (!is_rsxthr)
|
||||
m_offloader_fault_range = rsx::g_dma_manager.get_fault_range(is_writing);
|
||||
m_offloader_fault_cause = (is_writing) ? rsx::invalidation_cause::write : rsx::invalidation_cause::read;
|
||||
|
||||
rsx::g_dma_manager.set_mem_fault_flag();
|
||||
m_queue_status |= flush_queue_state::deadlock;
|
||||
|
||||
// Wait for deadlock to clear
|
||||
while (m_queue_status & flush_queue_state::deadlock)
|
||||
{
|
||||
_mm_pause();
|
||||
}
|
||||
|
||||
rsx::g_dma_manager.clear_mem_fault_flag();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool has_queue_ref = false;
|
||||
if (!is_current_thread())
|
||||
{
|
||||
//Always submit primary cb to ensure state consistency (flush pending changes such as image transitions)
|
||||
vm::temporary_unlock();
|
||||
|
@ -703,14 +722,14 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
return true;
|
||||
}
|
||||
|
||||
void VKGSRender::on_invalidate_memory_range(const utils::address_range &range)
|
||||
void VKGSRender::on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause)
|
||||
{
|
||||
std::lock_guard lock(m_secondary_cb_guard);
|
||||
|
||||
auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap));
|
||||
auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, cause));
|
||||
AUDIT(data.empty());
|
||||
|
||||
if (data.violation_handled)
|
||||
if (cause == rsx::invalidation_cause::unmap && data.violation_handled)
|
||||
{
|
||||
m_texture_cache.purge_unreleased_sections();
|
||||
{
|
||||
|
@ -720,6 +739,14 @@ void VKGSRender::on_invalidate_memory_range(const utils::address_range &range)
|
|||
}
|
||||
}
|
||||
|
||||
void VKGSRender::on_semaphore_acquire_wait()
|
||||
{
|
||||
if (m_flush_requests.pending() || m_queue_status & flush_queue_state::deadlock)
|
||||
{
|
||||
do_local_task(rsx::FIFO_state::lock_wait);
|
||||
}
|
||||
}
|
||||
|
||||
void VKGSRender::notify_tile_unbound(u32 tile)
|
||||
{
|
||||
//TODO: Handle texture writeback
|
||||
|
@ -2326,16 +2353,28 @@ void VKGSRender::frame_context_cleanup(frame_context_t *ctx, bool free_resources
|
|||
|
||||
void VKGSRender::do_local_task(rsx::FIFO_state state)
|
||||
{
|
||||
if (m_queue_status & flush_queue_state::deadlock)
|
||||
{
|
||||
// Clear offloader deadlock
|
||||
// NOTE: It is not possible to handle regular flush requests before this is cleared
|
||||
// NOTE: This may cause graphics corruption due to unsynchronized modification
|
||||
flush_command_queue();
|
||||
on_invalidate_memory_range(m_offloader_fault_range, m_offloader_fault_cause);
|
||||
m_queue_status.clear(flush_queue_state::deadlock);
|
||||
}
|
||||
|
||||
if (m_flush_requests.pending())
|
||||
{
|
||||
std::lock_guard lock(m_flush_queue_mutex);
|
||||
if (m_flush_queue_mutex.try_lock())
|
||||
{
|
||||
// TODO: Determine if a hard sync is necessary
|
||||
// Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
|
||||
flush_command_queue();
|
||||
|
||||
//TODO: Determine if a hard sync is necessary
|
||||
//Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
|
||||
flush_command_queue();
|
||||
|
||||
m_flush_requests.clear_pending_flag();
|
||||
m_flush_requests.consumer_wait();
|
||||
m_flush_requests.clear_pending_flag();
|
||||
m_flush_requests.consumer_wait();
|
||||
m_flush_queue_mutex.unlock();
|
||||
}
|
||||
}
|
||||
else if (!in_begin_end && state != rsx::FIFO_state::lock_wait)
|
||||
{
|
||||
|
|
|
@ -294,6 +294,12 @@ struct flush_request_task
|
|||
}
|
||||
};
|
||||
|
||||
enum flush_queue_state : u32
|
||||
{
|
||||
ok = 0,
|
||||
deadlock = 1
|
||||
};
|
||||
|
||||
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
|
||||
{
|
||||
private:
|
||||
|
@ -404,6 +410,11 @@ private:
|
|||
shared_mutex m_flush_queue_mutex;
|
||||
flush_request_task m_flush_requests;
|
||||
|
||||
// Offloader thread deadlock recovery
|
||||
rsx::atomic_bitmask_t<flush_queue_state> m_queue_status;
|
||||
utils::address_range m_offloader_fault_range;
|
||||
rsx::invalidation_cause m_offloader_fault_cause;
|
||||
|
||||
bool m_render_pass_open = false;
|
||||
u64 m_current_renderpass_key = 0;
|
||||
VkRenderPass m_cached_renderpass = VK_NULL_HANDLE;
|
||||
|
@ -488,7 +499,8 @@ protected:
|
|||
void notify_tile_unbound(u32 tile) override;
|
||||
|
||||
bool on_access_violation(u32 address, bool is_writing) override;
|
||||
void on_invalidate_memory_range(const utils::address_range &range) override;
|
||||
void on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause) override;
|
||||
void on_semaphore_acquire_wait() override;
|
||||
|
||||
bool on_decompiler_task() override;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue