diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 4e05bacb98..3b2606b902 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2569,7 +2569,7 @@ namespace rsx if (!result.queries.empty()) { cond_render_ctrl.set_eval_sources(result.queries); - sync_hint(FIFO_hint::hint_conditional_render_eval, cond_render_ctrl.eval_sources.front()); + sync_hint(FIFO_hint::hint_conditional_render_eval, reinterpret_cast(ref)); } else { diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 94be2f1e03..bfd2c7c961 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -356,6 +356,8 @@ namespace rsx struct frame_statistics_t { u32 draw_calls; + u32 submit_count; + s64 setup_time; s64 vertex_upload_time; s64 textures_upload_time; @@ -434,7 +436,6 @@ namespace rsx s32 m_skip_frame_ctr = 0; bool skip_current_frame = false; - frame_statistics_t stats{}; backend_configuration backend_config{}; @@ -793,6 +794,9 @@ namespace rsx // Get RSX approximate load in % u32 get_load(); + // Get stats object + frame_statistics_t& get_stats() { return m_frame_stats; } + // Returns true if the current thread is the active RSX thread inline bool is_current_thread() const { diff --git a/rpcs3/Emu/RSX/RSXZCULL.cpp b/rpcs3/Emu/RSX/RSXZCULL.cpp index 4d00248714..f32f5c0946 100644 --- a/rpcs3/Emu/RSX/RSXZCULL.cpp +++ b/rpcs3/Emu/RSX/RSXZCULL.cpp @@ -3,18 +3,6 @@ namespace rsx { - static inline std::string_view location_tostring(u32 location) - { - ensure(location < 2); - const char* location_names[] = {"CELL_GCM_LOCATION_LOCAL", "CELL_GCM_LOCATION_MAIN"}; - return location_names[location]; - } - - static inline u32 classify_location(u32 address) - { - return (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN; - } - namespace reports { ZCULL_control::ZCULL_control() @@ -783,7 +771,7 @@ namespace rsx void ZCULL_control::on_report_enqueued(vm::addr_t address) { - const auto location = (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN; + const auto location = rsx::classify_location(address); std::scoped_lock lock(m_pages_mutex); if (!m_pages_accessed[location]) [[ likely ]] @@ -806,7 +794,7 @@ namespace rsx void ZCULL_control::on_report_completed(vm::addr_t address) { - const auto location = (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN; + const auto location = rsx::classify_location(address); if (!m_pages_accessed[location]) { const auto page_address = static_cast(address) & ~0xfff; @@ -820,7 +808,7 @@ namespace rsx ensure(page.has_refs()); page.release(); - if (!page.has_refs()) + if (!page.has_refs() && location != CELL_GCM_LOCATION_LOCAL) { if (page.prot != utils::protection::rw) { diff --git a/rpcs3/Emu/RSX/RSXZCULL.h b/rpcs3/Emu/RSX/RSXZCULL.h index c6841af200..7426542cc2 100644 --- a/rpcs3/Emu/RSX/RSXZCULL.h +++ b/rpcs3/Emu/RSX/RSXZCULL.h @@ -14,6 +14,18 @@ namespace rsx { class thread; + static inline std::string_view location_tostring(u32 location) + { + ensure(location < 2); + const char* location_names[] = { "CELL_GCM_LOCATION_LOCAL", "CELL_GCM_LOCATION_MAIN" }; + return location_names[location]; + } + + static inline u32 classify_location(u32 address) + { + return (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN; + } + namespace reports { struct occlusion_query_info @@ -173,6 +185,9 @@ namespace rsx // Check paging issues bool on_access_violation(u32 address); + // Optimization check + bool is_query_result_urgent(u32 address) const { return m_pages_accessed[rsx::classify_location(address)]; } + // Backend methods (optional, will return everything as always visible by default) virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {} virtual void end_occlusion_query(occlusion_query_info* /*query*/) {} diff --git a/rpcs3/Emu/RSX/VK/VKCommandStream.cpp b/rpcs3/Emu/RSX/VK/VKCommandStream.cpp index d211a88c4d..9e2ae3f70c 100644 --- a/rpcs3/Emu/RSX/VK/VKCommandStream.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommandStream.cpp @@ -50,6 +50,8 @@ namespace vk void queue_submit(const queue_submit_t& submit_info, VkBool32 flush) { + rsx::get_current_renderer()->get_stats().submit_count++; + // Access to this method must be externally synchronized. // Offloader is guaranteed to never call this for async flushes. vk::descriptors::flush(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 077492b7d8..3155ea3e86 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1577,18 +1577,11 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args) ensure(args); rsx::thread::sync_hint(hint, args); - // Occlusion queries not enabled, do nothing if (!(m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task)) + { + // Occlusion queries not enabled, do nothing return; - - // Check if the required report is synced to this CB - auto occlusion_info = static_cast(args); - auto& data = m_occlusion_map[occlusion_info->driver_handle]; - - // NOTE: Currently, a special condition exists where the indices can be empty even with active draw count. - // This is caused by async compiler and should be removed when ubershaders are added in - if (!data.is_current(m_current_command_buffer) || data.indices.empty()) - return; + } // Occlusion test result evaluation is coming up, avoid a hard sync switch (hint) @@ -1597,15 +1590,45 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args) { // If a flush request is already enqueued, do nothing if (m_flush_requests.pending()) + { return; + } - // Schedule a sync on the next loop iteration - m_flush_requests.post(false); - m_flush_requests.remove_one(); + // If the result is not going to be read by CELL, do nothing + const auto ref_addr = reinterpret_cast(args); + if (!zcull_ctrl->is_query_result_urgent(ref_addr)) + { + // No effect on CELL behaviour, it will be faster to handle this in RSX code + return; + } + + // OK, cell will be accessing the results, probably. + // Try to avoid flush spam, it is more costly to flush the CB than it is to just upload the vertex data + // This is supposed to be an optimization afterall. + const auto now = rsx::uclock(); + if ((now - m_last_cond_render_eval_hint) > 50) + { + // Schedule a sync on the next loop iteration + m_flush_requests.post(false); + m_flush_requests.remove_one(); + } + + m_last_cond_render_eval_hint = now; break; } case rsx::FIFO_hint::hint_zcull_sync: { + // Check if the required report is synced to this CB + auto occlusion_info = static_cast(args); + auto& data = m_occlusion_map[occlusion_info->driver_handle]; + + // NOTE: Currently, a special condition exists where the indices can be empty even with active draw count. + // This is caused by async compiler and should be removed when ubershaders are added in + if (!data.is_current(m_current_command_buffer) || data.indices.empty()) + { + return; + } + // Unavoidable hard sync coming up, flush immediately // This heavyweight hint should be used with caution std::lock_guard lock(m_flush_queue_mutex); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 117516dd23..80dd9cdfdd 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -173,6 +173,8 @@ private: shared_mutex m_flush_queue_mutex; vk::flush_request_task m_flush_requests; + ullong m_last_cond_render_eval_hint = 0; + // Offloader thread deadlock recovery rsx::atomic_bitmask_t m_queue_status; utils::address_range m_offloader_fault_range; diff --git a/rpcs3/Emu/RSX/VK/VKPresent.cpp b/rpcs3/Emu/RSX/VK/VKPresent.cpp index 938236f5da..32d7d92c4e 100644 --- a/rpcs3/Emu/RSX/VK/VKPresent.cpp +++ b/rpcs3/Emu/RSX/VK/VKPresent.cpp @@ -747,6 +747,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info) if (g_cfg.video.overlay) { + // TODO: Move this to native overlay! It is both faster and easier to manage if (!m_text_writer) { auto key = vk::get_renderpass_key(m_swapchain->get_surface_format()); @@ -756,13 +757,14 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info) m_text_writer->set_scale(m_frame->client_device_pixel_ratio()); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load())); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time)); - m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load())); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("submits: %20d", info.stats.submit_count)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time)); + m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 4, 126, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time)); const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count(); const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);