// Copyright 2016 Dolphin Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "VideoBackends/Vulkan/VKPerfQuery.h" #include #include #include #include "Common/Assert.h" #include "Common/Logging/Log.h" #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/VKGfx.h" #include "VideoBackends/Vulkan/VulkanContext.h" #include "VideoCommon/FramebufferManager.h" #include "VideoCommon/VideoCommon.h" namespace Vulkan { PerfQuery::PerfQuery() = default; PerfQuery::~PerfQuery() { if (m_query_pool != VK_NULL_HANDLE) vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr); } bool PerfQuery::Initialize() { if (!CreateQueryPool()) { PanicAlertFmt("Failed to create query pool"); return false; } // Vulkan requires query pools to be reset after creation ResetQuery(); return true; } void PerfQuery::EnableQuery(PerfQueryGroup group) { // Block if there are no free slots. // Otherwise, try to keep half of them available. const u32 query_count = m_query_count.load(std::memory_order_relaxed); if (query_count > m_query_buffer.size() / 2) PartialFlush(query_count == PERF_QUERY_BUFFER_SIZE); // Ensure command buffer is ready to go before beginning the query, that way we don't submit // a buffer with open queries. StateTracker::GetInstance()->Bind(); if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP) { ActiveQuery& entry = m_query_buffer[m_query_next_pos]; DEBUG_ASSERT(!entry.has_value); entry.has_value = true; entry.query_group = group; // Use precise queries if supported, otherwise boolean (which will be incorrect). VkQueryControlFlags flags = g_vulkan_context->SupportsPreciseOcclusionQueries() ? VK_QUERY_CONTROL_PRECISE_BIT : 0; // Ensure the query starts within a render pass. StateTracker::GetInstance()->BeginRenderPass(); vkCmdBeginQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos, flags); } } void PerfQuery::DisableQuery(PerfQueryGroup group) { if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP) { vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos); ActiveQuery& entry = m_query_buffer[m_query_next_pos]; entry.fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter(); m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE; m_query_count.fetch_add(1, std::memory_order_relaxed); } } void PerfQuery::ResetQuery() { m_query_count.store(0, std::memory_order_relaxed); m_query_readback_pos = 0; m_query_next_pos = 0; for (size_t i = 0; i < m_results.size(); ++i) m_results[i].store(0, std::memory_order_relaxed); // Reset entire query pool, ensuring all queries are ready to write to. StateTracker::GetInstance()->EndRenderPass(); vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, 0, PERF_QUERY_BUFFER_SIZE); std::memset(m_query_buffer.data(), 0, sizeof(ActiveQuery) * m_query_buffer.size()); } u32 PerfQuery::GetQueryResult(PerfQueryType type) { u32 result = 0; if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) { result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed); } else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT) { result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed); } else if (type == PQ_BLEND_INPUT) { result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) + m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed); } else if (type == PQ_EFB_COPY_CLOCKS) { result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed); } return result / 4; } void PerfQuery::FlushResults() { if (!IsFlushed()) PartialFlush(true); ASSERT(IsFlushed()); } bool PerfQuery::IsFlushed() const { return m_query_count.load(std::memory_order_relaxed) == 0; } bool PerfQuery::CreateQueryPool() { VkQueryPoolCreateInfo info = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // VkStructureType sType nullptr, // const void* pNext 0, // VkQueryPoolCreateFlags flags VK_QUERY_TYPE_OCCLUSION, // VkQueryType queryType PERF_QUERY_BUFFER_SIZE, // uint32_t queryCount 0 // VkQueryPipelineStatisticFlags pipelineStatistics; }; VkResult res = vkCreateQueryPool(g_vulkan_context->GetDevice(), &info, nullptr, &m_query_pool); if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: "); return false; } return true; } void PerfQuery::ReadbackQueries() { const u64 completed_fence_counter = g_command_buffer_mgr->GetCompletedFenceCounter(); // Need to save these since ProcessResults will modify them. const u32 outstanding_queries = m_query_count.load(std::memory_order_relaxed); u32 readback_count = 0; for (u32 i = 0; i < outstanding_queries; i++) { u32 index = (m_query_readback_pos + readback_count) % PERF_QUERY_BUFFER_SIZE; const ActiveQuery& entry = m_query_buffer[index]; if (entry.fence_counter > completed_fence_counter) break; // If this wrapped around, we need to flush the entries before the end of the buffer. if (index < m_query_readback_pos) { ReadbackQueries(readback_count); DEBUG_ASSERT(m_query_readback_pos == 0); readback_count = 0; } readback_count++; } if (readback_count > 0) ReadbackQueries(readback_count); } void PerfQuery::ReadbackQueries(u32 query_count) { // Should be at maximum query_count queries pending. ASSERT(query_count <= m_query_count.load(std::memory_order_relaxed) && (m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE); // Read back from the GPU. VkResult res = vkGetQueryPoolResults( g_vulkan_context->GetDevice(), m_query_pool, m_query_readback_pos, query_count, query_count * sizeof(PerfQueryDataType), m_query_result_buffer.data(), sizeof(PerfQueryDataType), VK_QUERY_RESULT_WAIT_BIT); if (res != VK_SUCCESS) LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: "); StateTracker::GetInstance()->EndRenderPass(); vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_readback_pos, query_count); // Remove pending queries. for (u32 i = 0; i < query_count; i++) { u32 index = (m_query_readback_pos + i) % PERF_QUERY_BUFFER_SIZE; ActiveQuery& entry = m_query_buffer[index]; // Should have a fence associated with it (waiting for a result). DEBUG_ASSERT(entry.fence_counter != 0); entry.fence_counter = 0; entry.has_value = false; // NOTE: Reported pixel metrics should be referenced to native resolution const u64 native_res_result = static_cast(m_query_result_buffer[i]) * EFB_WIDTH / g_framebuffer_manager->GetEFBWidth() * EFB_HEIGHT / g_framebuffer_manager->GetEFBHeight(); m_results[entry.query_group].fetch_add(static_cast(native_res_result), std::memory_order_relaxed); } m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE; m_query_count.fetch_sub(query_count, std::memory_order_relaxed); } void PerfQuery::PartialFlush(bool blocking) { // Submit a command buffer in the background if the front query is not bound to one. if (blocking || m_query_buffer[m_query_readback_pos].fence_counter == g_command_buffer_mgr->GetCurrentFenceCounter()) { VKGfx::GetInstance()->ExecuteCommandBuffer(true, blocking); } ReadbackQueries(); } } // namespace Vulkan