rsx/vk: Redesign how conditional rendering hints work

- Pass a sync address to the backend
- Ignore the hint if the query is running in lazy mode
- Do not submit CBs too close to each other. Submits are expensive
This commit is contained in:
kd-11 2022-05-14 17:43:43 +03:00 committed by kd-11
parent 0244c4046e
commit 7fa521a046
8 changed files with 73 additions and 37 deletions

View File

@ -2569,7 +2569,7 @@ namespace rsx
if (!result.queries.empty())
{
cond_render_ctrl.set_eval_sources(result.queries);
sync_hint(FIFO_hint::hint_conditional_render_eval, cond_render_ctrl.eval_sources.front());
sync_hint(FIFO_hint::hint_conditional_render_eval, reinterpret_cast<void*>(ref));
}
else
{

View File

@ -356,6 +356,8 @@ namespace rsx
struct frame_statistics_t
{
u32 draw_calls;
u32 submit_count;
s64 setup_time;
s64 vertex_upload_time;
s64 textures_upload_time;
@ -434,7 +436,6 @@ namespace rsx
s32 m_skip_frame_ctr = 0;
bool skip_current_frame = false;
frame_statistics_t stats{};
backend_configuration backend_config{};
@ -793,6 +794,9 @@ namespace rsx
// Get RSX approximate load in %
u32 get_load();
// Get stats object
frame_statistics_t& get_stats() { return m_frame_stats; }
// Returns true if the current thread is the active RSX thread
inline bool is_current_thread() const
{

View File

@ -3,18 +3,6 @@
namespace rsx
{
static inline std::string_view location_tostring(u32 location)
{
ensure(location < 2);
const char* location_names[] = {"CELL_GCM_LOCATION_LOCAL", "CELL_GCM_LOCATION_MAIN"};
return location_names[location];
}
static inline u32 classify_location(u32 address)
{
return (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
}
namespace reports
{
ZCULL_control::ZCULL_control()
@ -783,7 +771,7 @@ namespace rsx
void ZCULL_control::on_report_enqueued(vm::addr_t address)
{
const auto location = (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
const auto location = rsx::classify_location(address);
std::scoped_lock lock(m_pages_mutex);
if (!m_pages_accessed[location]) [[ likely ]]
@ -806,7 +794,7 @@ namespace rsx
void ZCULL_control::on_report_completed(vm::addr_t address)
{
const auto location = (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
const auto location = rsx::classify_location(address);
if (!m_pages_accessed[location])
{
const auto page_address = static_cast<u32>(address) & ~0xfff;
@ -820,7 +808,7 @@ namespace rsx
ensure(page.has_refs());
page.release();
if (!page.has_refs())
if (!page.has_refs() && location != CELL_GCM_LOCATION_LOCAL)
{
if (page.prot != utils::protection::rw)
{

View File

@ -14,6 +14,18 @@ namespace rsx
{
class thread;
static inline std::string_view location_tostring(u32 location)
{
ensure(location < 2);
const char* location_names[] = { "CELL_GCM_LOCATION_LOCAL", "CELL_GCM_LOCATION_MAIN" };
return location_names[location];
}
static inline u32 classify_location(u32 address)
{
return (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN;
}
namespace reports
{
struct occlusion_query_info
@ -173,6 +185,9 @@ namespace rsx
// Check paging issues
bool on_access_violation(u32 address);
// Optimization check
bool is_query_result_urgent(u32 address) const { return m_pages_accessed[rsx::classify_location(address)]; }
// Backend methods (optional, will return everything as always visible by default)
virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}

View File

@ -50,6 +50,8 @@ namespace vk
void queue_submit(const queue_submit_t& submit_info, VkBool32 flush)
{
rsx::get_current_renderer()->get_stats().submit_count++;
// Access to this method must be externally synchronized.
// Offloader is guaranteed to never call this for async flushes.
vk::descriptors::flush();

View File

@ -1577,18 +1577,11 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args)
ensure(args);
rsx::thread::sync_hint(hint, args);
// Occlusion queries not enabled, do nothing
if (!(m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task))
{
// Occlusion queries not enabled, do nothing
return;
// Check if the required report is synced to this CB
auto occlusion_info = static_cast<rsx::reports::occlusion_query_info*>(args);
auto& data = m_occlusion_map[occlusion_info->driver_handle];
// NOTE: Currently, a special condition exists where the indices can be empty even with active draw count.
// This is caused by async compiler and should be removed when ubershaders are added in
if (!data.is_current(m_current_command_buffer) || data.indices.empty())
return;
}
// Occlusion test result evaluation is coming up, avoid a hard sync
switch (hint)
@ -1597,15 +1590,45 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args)
{
// If a flush request is already enqueued, do nothing
if (m_flush_requests.pending())
{
return;
}
// Schedule a sync on the next loop iteration
m_flush_requests.post(false);
m_flush_requests.remove_one();
// If the result is not going to be read by CELL, do nothing
const auto ref_addr = reinterpret_cast<u32>(args);
if (!zcull_ctrl->is_query_result_urgent(ref_addr))
{
// No effect on CELL behaviour, it will be faster to handle this in RSX code
return;
}
// OK, cell will be accessing the results, probably.
// Try to avoid flush spam, it is more costly to flush the CB than it is to just upload the vertex data
// This is supposed to be an optimization afterall.
const auto now = rsx::uclock();
if ((now - m_last_cond_render_eval_hint) > 50)
{
// Schedule a sync on the next loop iteration
m_flush_requests.post(false);
m_flush_requests.remove_one();
}
m_last_cond_render_eval_hint = now;
break;
}
case rsx::FIFO_hint::hint_zcull_sync:
{
// Check if the required report is synced to this CB
auto occlusion_info = static_cast<rsx::reports::occlusion_query_info*>(args);
auto& data = m_occlusion_map[occlusion_info->driver_handle];
// NOTE: Currently, a special condition exists where the indices can be empty even with active draw count.
// This is caused by async compiler and should be removed when ubershaders are added in
if (!data.is_current(m_current_command_buffer) || data.indices.empty())
{
return;
}
// Unavoidable hard sync coming up, flush immediately
// This heavyweight hint should be used with caution
std::lock_guard lock(m_flush_queue_mutex);

View File

@ -173,6 +173,8 @@ private:
shared_mutex m_flush_queue_mutex;
vk::flush_request_task m_flush_requests;
ullong m_last_cond_render_eval_hint = 0;
// Offloader thread deadlock recovery
rsx::atomic_bitmask_t<flush_queue_state> m_queue_status;
utils::address_range m_offloader_fault_range;

View File

@ -747,6 +747,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
if (g_cfg.video.overlay)
{
// TODO: Move this to native overlay! It is both faster and easier to manage
if (!m_text_writer)
{
auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
@ -756,13 +757,14 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
m_text_writer->set_scale(m_frame->client_device_pixel_ratio());
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("submits: %20d", info.stats.submit_count));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 126, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time));
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);