Implement full occlusion queries for the Nexus 9.

GLES3 spec is worthless and only returns a boolean result for occlusion queries. This is fine for simple cellular games but we need more than a
boolean result.
Thankfully Nvidia exposes GL_NV_occlusion_queries under a OpenGL ES extension, which allows us to get full samples rendered.
The only device this change affects is the Nexus 9, since it is an Nvidia K1 crippled to only support OpenGL ES.
No other OpenGL ES device that I know of supports this extension.
This commit is contained in:
Ryan Houdek 2015-02-21 16:58:53 -06:00
parent a5b4ac6faa
commit e9ac4d53a6
3 changed files with 232 additions and 77 deletions

View File

@ -9,54 +9,32 @@
namespace OGL
{
PerfQueryBase* GetPerfQuery()
{
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3 &&
GLExtensions::Supports("GL_NV_occlusion_query_samples"))
return new PerfQueryGLESNV();
else if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3)
return new PerfQueryGL(GL_ANY_SAMPLES_PASSED);
else
return new PerfQueryGL(GL_SAMPLES_PASSED);
}
PerfQuery::PerfQuery()
: m_query_read_pos()
, m_query_count()
{
for (ActiveQuery& query : m_query_buffer)
glGenQueries(1, &query.query_id);
ResetQuery();
}
PerfQuery::~PerfQuery()
{
for (ActiveQuery& query : m_query_buffer)
glDeleteQueries(1, &query.query_id);
}
void PerfQuery::EnableQuery(PerfQueryGroup type)
{
// Is this sane?
if (m_query_count > m_query_buffer.size() / 2)
WeakFlush();
if (m_query_buffer.size() == m_query_count)
{
FlushOne();
//ERROR_LOG(VIDEO, "Flushed query buffer early!");
}
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
glBeginQuery(GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL ? GL_SAMPLES_PASSED : GL_ANY_SAMPLES_PASSED, entry.query_id);
entry.query_type = type;
++m_query_count;
}
m_query->EnableQuery(type);
}
void PerfQuery::DisableQuery(PerfQueryGroup type)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
glEndQuery(GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL ? GL_SAMPLES_PASSED : GL_ANY_SAMPLES_PASSED);
}
m_query->DisableQuery(type);
}
bool PerfQuery::IsFlushed() const
@ -64,45 +42,10 @@ bool PerfQuery::IsFlushed() const
return 0 == m_query_count;
}
void PerfQuery::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = 0;
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result);
// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight();
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
--m_query_count;
}
// TODO: could selectively flush things, but I don't think that will do much
void PerfQuery::FlushResults()
{
while (!IsFlushed())
FlushOne();
}
void PerfQuery::WeakFlush()
{
while (!IsFlushed())
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = GL_FALSE;
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result);
if (GL_TRUE == result)
{
FlushOne();
}
else
{
break;
}
}
m_query->FlushResults();
}
void PerfQuery::ResetQuery()
@ -135,4 +78,176 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
return result / 4;
}
// Implementations
PerfQueryGL::PerfQueryGL(GLenum query_type)
: m_query_type(query_type)
{
for (ActiveQuery& query : m_query_buffer)
glGenQueries(1, &query.query_id);
}
PerfQueryGL::~PerfQueryGL()
{
for (ActiveQuery& query : m_query_buffer)
glDeleteQueries(1, &query.query_id);
}
void PerfQueryGL::EnableQuery(PerfQueryGroup type)
{
// Is this sane?
if (m_query_count > m_query_buffer.size() / 2)
WeakFlush();
if (m_query_buffer.size() == m_query_count)
{
FlushOne();
//ERROR_LOG(VIDEO, "Flushed query buffer early!");
}
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
glBeginQuery(m_query_type, entry.query_id);
entry.query_type = type;
++m_query_count;
}
}
void PerfQueryGL::DisableQuery(PerfQueryGroup type)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
glEndQuery(m_query_type);
}
}
void PerfQueryGL::WeakFlush()
{
while (!IsFlushed())
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = GL_FALSE;
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result);
if (GL_TRUE == result)
{
FlushOne();
}
else
{
break;
}
}
}
void PerfQueryGL::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = 0;
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result);
// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight();
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
--m_query_count;
}
// TODO: could selectively flush things, but I don't think that will do much
void PerfQueryGL::FlushResults()
{
while (!IsFlushed())
FlushOne();
}
PerfQueryGLESNV::PerfQueryGLESNV()
{
for (ActiveQuery& query : m_query_buffer)
glGenOcclusionQueriesNV(1, &query.query_id);
}
PerfQueryGLESNV::~PerfQueryGLESNV()
{
for (ActiveQuery& query : m_query_buffer)
glDeleteOcclusionQueriesNV(1, &query.query_id);
}
void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
{
// Is this sane?
if (m_query_count > m_query_buffer.size() / 2)
WeakFlush();
if (m_query_buffer.size() == m_query_count)
{
FlushOne();
//ERROR_LOG(VIDEO, "Flushed query buffer early!");
}
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()];
glBeginOcclusionQueryNV(entry.query_id);
entry.query_type = type;
++m_query_count;
}
}
void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
glEndOcclusionQueryNV();
}
}
void PerfQueryGLESNV::WeakFlush()
{
while (!IsFlushed())
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = GL_FALSE;
glGetOcclusionQueryuivNV(entry.query_id, GL_PIXEL_COUNT_AVAILABLE_NV, &result);
if (GL_TRUE == result)
{
FlushOne();
}
else
{
break;
}
}
}
void PerfQueryGLESNV::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];
GLuint result = 0;
glGetOcclusionQueryuivNV(entry.query_id, GL_OCCLUSION_TEST_RESULT_HP, &result);
// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight();
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
--m_query_count;
}
// TODO: could selectively flush things, but I don't think that will do much
void PerfQueryGLESNV::FlushResults()
{
while (!IsFlushed())
FlushOne();
}
} // namespace

View File

@ -1,18 +1,20 @@
#pragma once
#include <array>
#include <memory>
#include "VideoBackends/OGL/GLExtensions/GLExtensions.h"
#include "VideoCommon/PerfQueryBase.h"
namespace OGL
{
PerfQueryBase* GetPerfQuery();
class PerfQuery : public PerfQueryBase
{
public:
PerfQuery();
~PerfQuery();
~PerfQuery() {}
void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
@ -21,7 +23,7 @@ public:
void FlushResults() override;
bool IsFlushed() const override;
private:
protected:
struct ActiveQuery
{
GLuint query_id;
@ -31,10 +33,6 @@ private:
// when testing in SMS: 64 was too small, 128 was ok
static const u32 PERF_QUERY_BUFFER_SIZE = 512;
void WeakFlush();
// Only use when non-empty
void FlushOne();
// This contains gl query objects with unretrieved results.
std::array<ActiveQuery, PERF_QUERY_BUFFER_SIZE> m_query_buffer;
u32 m_query_read_pos;
@ -42,6 +40,48 @@ private:
// TODO: sloppy
volatile u32 m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];
private:
// Implementation
std::unique_ptr<PerfQuery> m_query;
};
// Implementations
class PerfQueryGL : public PerfQuery
{
public:
PerfQueryGL(GLenum query_type);
~PerfQueryGL();
void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void FlushResults() override;
private:
void WeakFlush();
// Only use when non-empty
void FlushOne();
GLenum m_query_type;
};
class PerfQueryGLESNV : public PerfQuery
{
public:
PerfQueryGLESNV();
~PerfQueryGLESNV();
void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void FlushResults() override;
private:
void WeakFlush();
// Only use when non-empty
void FlushOne();
};
} // namespace

View File

@ -199,7 +199,7 @@ void VideoBackend::Video_Prepare()
BPInit();
g_vertex_manager = new VertexManager;
g_perf_query = new PerfQuery;
g_perf_query = GetPerfQuery();
Fifo_Init(); // must be done before OpcodeDecoder_Init()
OpcodeDecoder_Init();
IndexGenerator::Init();