From e9ac4d53a6b50e61375ffc1994d7d278e4c94e7a Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sat, 21 Feb 2015 16:58:53 -0600 Subject: [PATCH] Implement full occlusion queries for the Nexus 9. GLES3 spec is worthless and only returns a boolean result for occlusion queries. This is fine for simple cellular games but we need more than a boolean result. Thankfully Nvidia exposes GL_NV_occlusion_queries under a OpenGL ES extension, which allows us to get full samples rendered. The only device this change affects is the Nexus 9, since it is an Nvidia K1 crippled to only support OpenGL ES. No other OpenGL ES device that I know of supports this extension. --- Source/Core/VideoBackends/OGL/PerfQuery.cpp | 255 ++++++++++++++------ Source/Core/VideoBackends/OGL/PerfQuery.h | 52 +++- Source/Core/VideoBackends/OGL/main.cpp | 2 +- 3 files changed, 232 insertions(+), 77 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/PerfQuery.cpp b/Source/Core/VideoBackends/OGL/PerfQuery.cpp index 178ef7108e..372cd17cf8 100644 --- a/Source/Core/VideoBackends/OGL/PerfQuery.cpp +++ b/Source/Core/VideoBackends/OGL/PerfQuery.cpp @@ -9,54 +9,32 @@ namespace OGL { +PerfQueryBase* GetPerfQuery() +{ + if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3 && + GLExtensions::Supports("GL_NV_occlusion_query_samples")) + return new PerfQueryGLESNV(); + else if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3) + return new PerfQueryGL(GL_ANY_SAMPLES_PASSED); + else + return new PerfQueryGL(GL_SAMPLES_PASSED); +} PerfQuery::PerfQuery() : m_query_read_pos() , m_query_count() { - for (ActiveQuery& query : m_query_buffer) - glGenQueries(1, &query.query_id); - ResetQuery(); } -PerfQuery::~PerfQuery() -{ - for (ActiveQuery& query : m_query_buffer) - glDeleteQueries(1, &query.query_id); -} - void PerfQuery::EnableQuery(PerfQueryGroup type) { - // Is this sane? - if (m_query_count > m_query_buffer.size() / 2) - WeakFlush(); - - if (m_query_buffer.size() == m_query_count) - { - FlushOne(); - //ERROR_LOG(VIDEO, "Flushed query buffer early!"); - } - - // start query - if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) - { - auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()]; - - glBeginQuery(GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL ? GL_SAMPLES_PASSED : GL_ANY_SAMPLES_PASSED, entry.query_id); - entry.query_type = type; - - ++m_query_count; - } + m_query->EnableQuery(type); } void PerfQuery::DisableQuery(PerfQueryGroup type) { - // stop query - if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) - { - glEndQuery(GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL ? GL_SAMPLES_PASSED : GL_ANY_SAMPLES_PASSED); - } + m_query->DisableQuery(type); } bool PerfQuery::IsFlushed() const @@ -64,45 +42,10 @@ bool PerfQuery::IsFlushed() const return 0 == m_query_count; } -void PerfQuery::FlushOne() -{ - auto& entry = m_query_buffer[m_query_read_pos]; - - GLuint result = 0; - glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result); - - // NOTE: Reported pixel metrics should be referenced to native resolution - m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight(); - - m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size(); - --m_query_count; -} - // TODO: could selectively flush things, but I don't think that will do much void PerfQuery::FlushResults() { - while (!IsFlushed()) - FlushOne(); -} - -void PerfQuery::WeakFlush() -{ - while (!IsFlushed()) - { - auto& entry = m_query_buffer[m_query_read_pos]; - - GLuint result = GL_FALSE; - glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result); - - if (GL_TRUE == result) - { - FlushOne(); - } - else - { - break; - } - } + m_query->FlushResults(); } void PerfQuery::ResetQuery() @@ -135,4 +78,176 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type) return result / 4; } +// Implementations +PerfQueryGL::PerfQueryGL(GLenum query_type) + : m_query_type(query_type) +{ + for (ActiveQuery& query : m_query_buffer) + glGenQueries(1, &query.query_id); +} + +PerfQueryGL::~PerfQueryGL() +{ + for (ActiveQuery& query : m_query_buffer) + glDeleteQueries(1, &query.query_id); +} + +void PerfQueryGL::EnableQuery(PerfQueryGroup type) +{ + // Is this sane? + if (m_query_count > m_query_buffer.size() / 2) + WeakFlush(); + + if (m_query_buffer.size() == m_query_count) + { + FlushOne(); + //ERROR_LOG(VIDEO, "Flushed query buffer early!"); + } + + // start query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()]; + + glBeginQuery(m_query_type, entry.query_id); + entry.query_type = type; + + ++m_query_count; + } +} +void PerfQueryGL::DisableQuery(PerfQueryGroup type) +{ + // stop query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + glEndQuery(m_query_type); + } +} + +void PerfQueryGL::WeakFlush() +{ + while (!IsFlushed()) + { + auto& entry = m_query_buffer[m_query_read_pos]; + + GLuint result = GL_FALSE; + glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result); + + if (GL_TRUE == result) + { + FlushOne(); + } + else + { + break; + } + } +} + +void PerfQueryGL::FlushOne() +{ + auto& entry = m_query_buffer[m_query_read_pos]; + + GLuint result = 0; + glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result); + + // NOTE: Reported pixel metrics should be referenced to native resolution + m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight(); + + m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size(); + --m_query_count; +} + +// TODO: could selectively flush things, but I don't think that will do much +void PerfQueryGL::FlushResults() +{ + while (!IsFlushed()) + FlushOne(); +} + +PerfQueryGLESNV::PerfQueryGLESNV() +{ + for (ActiveQuery& query : m_query_buffer) + glGenOcclusionQueriesNV(1, &query.query_id); +} + +PerfQueryGLESNV::~PerfQueryGLESNV() +{ + for (ActiveQuery& query : m_query_buffer) + glDeleteOcclusionQueriesNV(1, &query.query_id); +} + +void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type) +{ + // Is this sane? + if (m_query_count > m_query_buffer.size() / 2) + WeakFlush(); + + if (m_query_buffer.size() == m_query_count) + { + FlushOne(); + //ERROR_LOG(VIDEO, "Flushed query buffer early!"); + } + + // start query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()]; + + glBeginOcclusionQueryNV(entry.query_id); + entry.query_type = type; + + ++m_query_count; + } +} +void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type) +{ + // stop query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + glEndOcclusionQueryNV(); + } +} + +void PerfQueryGLESNV::WeakFlush() +{ + while (!IsFlushed()) + { + auto& entry = m_query_buffer[m_query_read_pos]; + + GLuint result = GL_FALSE; + glGetOcclusionQueryuivNV(entry.query_id, GL_PIXEL_COUNT_AVAILABLE_NV, &result); + + if (GL_TRUE == result) + { + FlushOne(); + } + else + { + break; + } + } +} + +void PerfQueryGLESNV::FlushOne() +{ + auto& entry = m_query_buffer[m_query_read_pos]; + + GLuint result = 0; + glGetOcclusionQueryuivNV(entry.query_id, GL_OCCLUSION_TEST_RESULT_HP, &result); + + // NOTE: Reported pixel metrics should be referenced to native resolution + m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight(); + + m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size(); + --m_query_count; +} + +// TODO: could selectively flush things, but I don't think that will do much +void PerfQueryGLESNV::FlushResults() +{ + while (!IsFlushed()) + FlushOne(); +} + } // namespace diff --git a/Source/Core/VideoBackends/OGL/PerfQuery.h b/Source/Core/VideoBackends/OGL/PerfQuery.h index ae1e5ddf59..aaf6863eac 100644 --- a/Source/Core/VideoBackends/OGL/PerfQuery.h +++ b/Source/Core/VideoBackends/OGL/PerfQuery.h @@ -1,18 +1,20 @@ #pragma once #include +#include #include "VideoBackends/OGL/GLExtensions/GLExtensions.h" #include "VideoCommon/PerfQueryBase.h" namespace OGL { +PerfQueryBase* GetPerfQuery(); class PerfQuery : public PerfQueryBase { public: PerfQuery(); - ~PerfQuery(); + ~PerfQuery() {} void EnableQuery(PerfQueryGroup type) override; void DisableQuery(PerfQueryGroup type) override; @@ -21,7 +23,7 @@ public: void FlushResults() override; bool IsFlushed() const override; -private: +protected: struct ActiveQuery { GLuint query_id; @@ -31,10 +33,6 @@ private: // when testing in SMS: 64 was too small, 128 was ok static const u32 PERF_QUERY_BUFFER_SIZE = 512; - void WeakFlush(); - // Only use when non-empty - void FlushOne(); - // This contains gl query objects with unretrieved results. std::array m_query_buffer; u32 m_query_read_pos; @@ -42,6 +40,48 @@ private: // TODO: sloppy volatile u32 m_query_count; volatile u32 m_results[PQG_NUM_MEMBERS]; + +private: + // Implementation + std::unique_ptr m_query; }; +// Implementations +class PerfQueryGL : public PerfQuery +{ +public: + PerfQueryGL(GLenum query_type); + ~PerfQueryGL(); + + void EnableQuery(PerfQueryGroup type) override; + void DisableQuery(PerfQueryGroup type) override; + void FlushResults() override; + +private: + + void WeakFlush(); + // Only use when non-empty + void FlushOne(); + + GLenum m_query_type; +}; + +class PerfQueryGLESNV : public PerfQuery +{ +public: + PerfQueryGLESNV(); + ~PerfQueryGLESNV(); + + void EnableQuery(PerfQueryGroup type) override; + void DisableQuery(PerfQueryGroup type) override; + void FlushResults() override; + +private: + + void WeakFlush(); + // Only use when non-empty + void FlushOne(); +}; + + } // namespace diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 83ab7593e7..9ab73a6e12 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -199,7 +199,7 @@ void VideoBackend::Video_Prepare() BPInit(); g_vertex_manager = new VertexManager; - g_perf_query = new PerfQuery; + g_perf_query = GetPerfQuery(); Fifo_Init(); // must be done before OpcodeDecoder_Init() OpcodeDecoder_Init(); IndexGenerator::Init();