// Copyright 2012 Dolphin Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "VideoBackends/OGL/OGLPerfQuery.h" #include #include "Common/CommonTypes.h" #include "Common/GL/GLExtensions/GLExtensions.h" #include "VideoBackends/OGL/OGLRender.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" namespace OGL { std::unique_ptr GetPerfQuery() { const bool is_gles = static_cast(g_renderer.get())->IsGLES(); if (is_gles && GLExtensions::Supports("GL_NV_occlusion_query_samples")) return std::make_unique(); else if (is_gles) return std::make_unique(GL_ANY_SAMPLES_PASSED); else return std::make_unique(GL_SAMPLES_PASSED); } PerfQuery::PerfQuery() : m_query_read_pos() { ResetQuery(); } void PerfQuery::EnableQuery(PerfQueryGroup group) { m_query->EnableQuery(group); } void PerfQuery::DisableQuery(PerfQueryGroup group) { m_query->DisableQuery(group); } bool PerfQuery::IsFlushed() const { return m_query_count.load(std::memory_order_relaxed) == 0; } // TODO: could selectively flush things, but I don't think that will do much void PerfQuery::FlushResults() { m_query->FlushResults(); } void PerfQuery::ResetQuery() { m_query_count.store(0, std::memory_order_relaxed); for (size_t i = 0; i < m_results.size(); ++i) m_results[i].store(0, std::memory_order_relaxed); } u32 PerfQuery::GetQueryResult(PerfQueryType type) { u32 result = 0; if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) { result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed); } else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT) { result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed); } else if (type == PQ_BLEND_INPUT) { result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) + m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed); } else if (type == PQ_EFB_COPY_CLOCKS) { result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed); } return result; } // Implementations PerfQueryGL::PerfQueryGL(GLenum query_type) : m_query_type(query_type) { for (ActiveQuery& query : m_query_buffer) glGenQueries(1, &query.query_id); } PerfQueryGL::~PerfQueryGL() { for (ActiveQuery& query : m_query_buffer) glDeleteQueries(1, &query.query_id); } void PerfQueryGL::EnableQuery(PerfQueryGroup group) { u32 query_count = m_query_count.load(std::memory_order_relaxed); // Is this sane? if (query_count > m_query_buffer.size() / 2) { WeakFlush(); query_count = m_query_count.load(std::memory_order_relaxed); } if (m_query_buffer.size() == query_count) { FlushOne(); query_count = m_query_count.load(std::memory_order_relaxed); // ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!"); } // start query if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP) { auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()]; glBeginQuery(m_query_type, entry.query_id); entry.query_group = group; m_query_count.fetch_add(1, std::memory_order_relaxed); } } void PerfQueryGL::DisableQuery(PerfQueryGroup group) { // stop query if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP) { glEndQuery(m_query_type); } } void PerfQueryGL::WeakFlush() { while (!IsFlushed()) { auto& entry = m_query_buffer[m_query_read_pos]; GLuint result = GL_FALSE; glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result); if (GL_TRUE == result) { FlushOne(); } else { break; } } } void PerfQueryGL::FlushOne() { auto& entry = m_query_buffer[m_query_read_pos]; GLuint result = 0; glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result); // NOTE: Reported pixel metrics should be referenced to native resolution // TODO: Dropping the lower 2 bits from this count should be closer to actual // hardware behavior when drawing triangles. result = static_cast(result) * EFB_WIDTH * EFB_HEIGHT / (g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight()); // Adjust for multisampling if (g_ActiveConfig.iMultisamples > 1) result /= g_ActiveConfig.iMultisamples; m_results[entry.query_group].fetch_add(result, std::memory_order_relaxed); m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size(); m_query_count.fetch_sub(1, std::memory_order_relaxed); } // TODO: could selectively flush things, but I don't think that will do much void PerfQueryGL::FlushResults() { while (!IsFlushed()) FlushOne(); } PerfQueryGLESNV::PerfQueryGLESNV() { for (ActiveQuery& query : m_query_buffer) glGenOcclusionQueriesNV(1, &query.query_id); } PerfQueryGLESNV::~PerfQueryGLESNV() { for (ActiveQuery& query : m_query_buffer) glDeleteOcclusionQueriesNV(1, &query.query_id); } void PerfQueryGLESNV::EnableQuery(PerfQueryGroup group) { u32 query_count = m_query_count.load(std::memory_order_relaxed); // Is this sane? if (query_count > m_query_buffer.size() / 2) { WeakFlush(); query_count = m_query_count.load(std::memory_order_relaxed); } if (m_query_buffer.size() == query_count) { FlushOne(); query_count = m_query_count.load(std::memory_order_relaxed); // ERROR_LOG_FMT(VIDEO, "Flushed query buffer early!"); } // start query if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP) { auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()]; glBeginOcclusionQueryNV(entry.query_id); entry.query_group = group; m_query_count.fetch_add(1, std::memory_order_relaxed); } } void PerfQueryGLESNV::DisableQuery(PerfQueryGroup group) { // stop query if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP) { glEndOcclusionQueryNV(); } } void PerfQueryGLESNV::WeakFlush() { while (!IsFlushed()) { auto& entry = m_query_buffer[m_query_read_pos]; GLuint result = GL_FALSE; glGetOcclusionQueryuivNV(entry.query_id, GL_PIXEL_COUNT_AVAILABLE_NV, &result); if (GL_TRUE == result) { FlushOne(); } else { break; } } } void PerfQueryGLESNV::FlushOne() { auto& entry = m_query_buffer[m_query_read_pos]; GLuint result = 0; glGetOcclusionQueryuivNV(entry.query_id, GL_OCCLUSION_TEST_RESULT_HP, &result); // NOTE: Reported pixel metrics should be referenced to native resolution // TODO: Dropping the lower 2 bits from this count should be closer to actual // hardware behavior when drawing triangles. const u64 native_res_result = static_cast(result) * EFB_WIDTH * EFB_HEIGHT / (g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight()); m_results[entry.query_group].fetch_add(static_cast(native_res_result), std::memory_order_relaxed); m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size(); m_query_count.fetch_sub(1, std::memory_order_relaxed); } // TODO: could selectively flush things, but I don't think that will do much void PerfQueryGLESNV::FlushResults() { while (!IsFlushed()) FlushOne(); } } // namespace OGL