Fix OGL perf queries and make them not slow!
This commit is contained in:
parent
54947b1e22
commit
53aec6c476
|
@ -22,6 +22,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "ChunkFile.h"
|
||||
#include "PerfQueryBase.h"
|
||||
|
||||
typedef void (*writeFn16)(const u16,const u32);
|
||||
typedef void (*writeFn32)(const u32,const u32);
|
||||
|
@ -107,6 +108,7 @@ public:
|
|||
virtual void Video_EndField() = 0;
|
||||
|
||||
virtual u32 Video_AccessEFB(EFBAccessType, u32, u32, u32) = 0;
|
||||
virtual u32 Video_GetQueryResult(PerfQueryType type) = 0;
|
||||
|
||||
virtual void Video_AddMessage(const char* pstr, unsigned int milliseconds) = 0;
|
||||
virtual void Video_ClearMessages() = 0;
|
||||
|
@ -154,7 +156,9 @@ class VideoBackendHardware : public VideoBackend
|
|||
void Video_ExitLoop();
|
||||
void Video_BeginField(u32, FieldType, u32, u32);
|
||||
void Video_EndField();
|
||||
|
||||
u32 Video_AccessEFB(EFBAccessType, u32, u32, u32);
|
||||
u32 Video_GetQueryResult(PerfQueryType type);
|
||||
|
||||
void Video_AddMessage(const char* pstr, unsigned int milliseconds);
|
||||
void Video_ClearMessages();
|
||||
|
|
|
@ -21,6 +21,10 @@ volatile u32 s_swapRequested = false;
|
|||
u32 s_efbAccessRequested = false;
|
||||
volatile u32 s_FifoShuttingDown = false;
|
||||
|
||||
std::condition_variable s_perf_query_cond;
|
||||
std::mutex s_perf_query_lock;
|
||||
static volatile bool s_perf_query_requested;
|
||||
|
||||
static volatile struct
|
||||
{
|
||||
u32 xfbAddr;
|
||||
|
@ -169,6 +173,43 @@ u32 VideoBackendHardware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool QueryResultIsReady()
|
||||
{
|
||||
return !s_perf_query_requested || s_FifoShuttingDown;
|
||||
}
|
||||
|
||||
void VideoFifo_CheckPerfQueryRequest()
|
||||
{
|
||||
if (s_perf_query_requested)
|
||||
{
|
||||
g_perf_query->FlushResults();
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(s_perf_query_lock);
|
||||
s_perf_query_requested = false;
|
||||
}
|
||||
|
||||
s_perf_query_cond.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
u32 VideoBackendHardware::Video_GetQueryResult(PerfQueryType type)
|
||||
{
|
||||
// Is this check sane?
|
||||
if (!g_perf_query->IsFlushed())
|
||||
{
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
|
||||
{
|
||||
s_perf_query_requested = true;
|
||||
std::unique_lock<std::mutex> lk(s_perf_query_lock);
|
||||
s_perf_query_cond.wait(lk, QueryResultIsReady);
|
||||
}
|
||||
else
|
||||
g_perf_query->FlushResults();
|
||||
}
|
||||
|
||||
return g_perf_query->GetQueryResult(type);
|
||||
}
|
||||
|
||||
void VideoBackendHardware::InitializeShared()
|
||||
{
|
||||
|
@ -176,6 +217,7 @@ void VideoBackendHardware::InitializeShared()
|
|||
|
||||
s_swapRequested = 0;
|
||||
s_efbAccessRequested = 0;
|
||||
s_perf_query_requested = false;
|
||||
s_FifoShuttingDown = 0;
|
||||
memset((void*)&s_beginFieldArgs, 0, sizeof(s_beginFieldArgs));
|
||||
memset(&s_accessEFBArgs, 0, sizeof(s_accessEFBArgs));
|
||||
|
@ -223,6 +265,7 @@ void VideoFifo_CheckAsyncRequest()
|
|||
{
|
||||
VideoFifo_CheckSwapRequest();
|
||||
VideoFifo_CheckEFBAccess();
|
||||
VideoFifo_CheckPerfQueryRequest();
|
||||
}
|
||||
|
||||
void VideoBackendHardware::Video_GatherPipeBursted()
|
||||
|
|
|
@ -32,6 +32,8 @@ public:
|
|||
virtual void DisableQuery(PerfQueryGroup type) {}
|
||||
virtual void ResetQuery() {}
|
||||
virtual u32 GetQueryResult(PerfQueryType type) { return 0; }
|
||||
virtual void FlushResults() {}
|
||||
virtual bool IsFlushed() const { return true; }
|
||||
};
|
||||
|
||||
extern PerfQueryBase* g_perf_query;
|
||||
|
|
|
@ -260,35 +260,35 @@ void Read16(u16& _uReturnValue, const u32 _iAddress)
|
|||
|
||||
// NOTE(neobrain): only PE_PERF_ZCOMP_OUTPUT is implemented in D3D11, but the other values shouldn't be contradictionary to the value of that register (i.e. INPUT registers should always be greater or equal to their corresponding OUTPUT registers).
|
||||
case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) & 0xFFFF;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) >> 16;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) >> 16;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) & 0xFFFF;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) >> 16;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) >> 16;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_INPUT_L:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT) & 0xFFFF;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_INPUT_H:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT) >> 16;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT) >> 16;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_OUTPUT_L:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT) & 0xFFFF;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_ZCOMP_OUTPUT_H:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT) >> 16;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT) >> 16;
|
||||
break;
|
||||
|
||||
case PE_PERF_BLEND_INPUT_L:
|
||||
|
@ -298,19 +298,20 @@ void Read16(u16& _uReturnValue, const u32 _iAddress)
|
|||
// In very old builds, Dolphin only returned 0. That caused the challenge to be immediately finished without any goop being cleaned (the timer just didn't even start counting from 3:00:00).
|
||||
// Later builds returned 1 for the high register. That caused the timer to actually count down, but made the challenge unbeatable because the game always thought you didn't clear any goop at all.
|
||||
// Note that currently this functionality is only implemented in the D3D11 backend.
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_BLEND_INPUT) & 0xFFFF;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_BLEND_INPUT) & 0xFFFF;
|
||||
//ERROR_LOG(VIDEO, "PQ_BLEND_INPUT: %d", g_video_backend->Video_GetQueryResult(PQ_BLEND_INPUT));
|
||||
break;
|
||||
|
||||
case PE_PERF_BLEND_INPUT_H:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_BLEND_INPUT) >> 16;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_BLEND_INPUT) >> 16;
|
||||
break;
|
||||
|
||||
case PE_PERF_EFB_COPY_CLOCKS_L:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_EFB_COPY_CLOCKS) & 0xFFFF;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_EFB_COPY_CLOCKS) & 0xFFFF;
|
||||
break;
|
||||
|
||||
case PE_PERF_EFB_COPY_CLOCKS_H:
|
||||
_uReturnValue = g_perf_query->GetQueryResult(PQ_EFB_COPY_CLOCKS) >> 16;
|
||||
_uReturnValue = g_video_backend->Video_GetQueryResult(PQ_EFB_COPY_CLOCKS) >> 16;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -1,31 +1,47 @@
|
|||
#include "GLUtil.h"
|
||||
#include "PerfQuery.h"
|
||||
|
||||
namespace OGL {
|
||||
|
||||
u32 results[PQG_NUM_MEMBERS] = { 0 };
|
||||
GLuint query_id;
|
||||
|
||||
PerfQueryGroup active_query;
|
||||
namespace OGL
|
||||
{
|
||||
|
||||
PerfQuery::PerfQuery()
|
||||
: m_query_read_pos()
|
||||
, m_query_count()
|
||||
{
|
||||
glGenQueries(1, &query_id);
|
||||
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
|
||||
glGenQueries(1, &m_query_buffer[i].query_id);
|
||||
|
||||
ResetQuery();
|
||||
}
|
||||
|
||||
PerfQuery::~PerfQuery()
|
||||
{
|
||||
glDeleteQueries(1, &query_id);
|
||||
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
|
||||
glDeleteQueries(1, &m_query_buffer[i].query_id);
|
||||
}
|
||||
|
||||
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
||||
{
|
||||
// Is this sane?
|
||||
if (m_query_count > ARRAYSIZE(m_query_buffer) / 2)
|
||||
WeakFlush();
|
||||
|
||||
if (ARRAYSIZE(m_query_buffer) == m_query_count)
|
||||
{
|
||||
FlushOne();
|
||||
//ERROR_LOG(VIDEO, "flushed query buffer early!");
|
||||
}
|
||||
|
||||
// start query
|
||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||
{
|
||||
glBeginQuery(GL_SAMPLES_PASSED, query_id);
|
||||
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)];
|
||||
|
||||
glBeginQuery(GL_SAMPLES_PASSED, entry.query_id);
|
||||
entry.query_type = type;
|
||||
|
||||
++m_query_count;
|
||||
}
|
||||
active_query = type;
|
||||
}
|
||||
|
||||
void PerfQuery::DisableQuery(PerfQueryGroup type)
|
||||
|
@ -34,45 +50,82 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
|
|||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||
{
|
||||
glEndQuery(GL_SAMPLES_PASSED);
|
||||
}
|
||||
}
|
||||
|
||||
GLuint query_result = GL_FALSE;
|
||||
while (query_result != GL_TRUE)
|
||||
bool PerfQuery::IsFlushed() const
|
||||
{
|
||||
return 0 == m_query_count;
|
||||
}
|
||||
|
||||
void PerfQuery::FlushOne()
|
||||
{
|
||||
auto& entry = m_query_buffer[m_query_read_pos];
|
||||
|
||||
GLuint result = 0;
|
||||
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result);
|
||||
|
||||
m_results[entry.query_type] += result;
|
||||
|
||||
m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
|
||||
--m_query_count;
|
||||
}
|
||||
|
||||
// TODO: could selectively flush things, but I don't think that will do much
|
||||
void PerfQuery::FlushResults()
|
||||
{
|
||||
while (!IsFlushed())
|
||||
FlushOne();
|
||||
}
|
||||
|
||||
void PerfQuery::WeakFlush()
|
||||
{
|
||||
while (!IsFlushed())
|
||||
{
|
||||
auto& entry = m_query_buffer[m_query_read_pos];
|
||||
|
||||
GLuint result = GL_FALSE;
|
||||
glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result);
|
||||
|
||||
if (GL_TRUE == result)
|
||||
{
|
||||
glGetQueryObjectuiv(query_id, GL_QUERY_RESULT_AVAILABLE, &query_result);
|
||||
FlushOne();
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
glGetQueryObjectuiv(query_id, GL_QUERY_RESULT, &query_result);
|
||||
|
||||
results[active_query] += query_result;
|
||||
}
|
||||
}
|
||||
|
||||
void PerfQuery::ResetQuery()
|
||||
{
|
||||
memset(results, 0, sizeof(results));
|
||||
m_query_count = 0;
|
||||
std::fill_n(m_results, ARRAYSIZE(m_results), 0);
|
||||
}
|
||||
|
||||
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
||||
{
|
||||
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC || type == PQ_BLEND_INPUT)
|
||||
{
|
||||
u32 result = 0;
|
||||
|
||||
}
|
||||
if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT || type == PQ_BLEND_INPUT)
|
||||
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
||||
{
|
||||
|
||||
result = m_results[PQG_ZCOMP_ZCOMPLOC];
|
||||
}
|
||||
if (type == PQ_BLEND_INPUT)
|
||||
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
||||
{
|
||||
results[PQ_BLEND_INPUT] = results[PQ_ZCOMP_OUTPUT] + results[PQ_ZCOMP_OUTPUT_ZCOMPLOC];
|
||||
result = m_results[PQG_ZCOMP];
|
||||
}
|
||||
else if (type == PQ_BLEND_INPUT)
|
||||
{
|
||||
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
|
||||
}
|
||||
else if (type == PQ_EFB_COPY_CLOCKS)
|
||||
{
|
||||
result = m_results[PQG_EFB_COPY_CLOCKS];
|
||||
}
|
||||
|
||||
if (type == PQ_EFB_COPY_CLOCKS)
|
||||
{
|
||||
// TODO
|
||||
}
|
||||
|
||||
return results[type];
|
||||
return result / 4;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -15,6 +15,30 @@ public:
|
|||
void DisableQuery(PerfQueryGroup type);
|
||||
void ResetQuery();
|
||||
u32 GetQueryResult(PerfQueryType type);
|
||||
void FlushResults();
|
||||
bool IsFlushed() const;
|
||||
|
||||
private:
|
||||
struct ActiveQuery
|
||||
{
|
||||
GLuint query_id;
|
||||
PerfQueryGroup query_type;
|
||||
};
|
||||
|
||||
// when testing in SMS: 64 was too small, 128 was ok
|
||||
static const int PERF_QUERY_BUFFER_SIZE = 512;
|
||||
|
||||
void WeakFlush();
|
||||
// Only use when non-empty
|
||||
void FlushOne();
|
||||
|
||||
// This contains gl query objects with unretrieved results.
|
||||
ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
|
||||
int m_query_read_pos;
|
||||
|
||||
// TODO: sloppy
|
||||
volatile int m_query_count;
|
||||
volatile u32 m_results[PQG_NUM_MEMBERS];
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -211,7 +211,7 @@ void VertexManager::vFlush()
|
|||
g_perf_query->EnableQuery(bpmem.zcontrol.zcomploc ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
||||
Draw();
|
||||
g_perf_query->DisableQuery(bpmem.zcontrol.zcomploc ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
||||
ERROR_LOG(VIDEO, "PerfQuery result: %d", g_perf_query->GetQueryResult(bpmem.zcontrol.zcomploc ? PQ_ZCOMP_OUTPUT_ZCOMPLOC : PQ_ZCOMP_OUTPUT));
|
||||
//ERROR_LOG(VIDEO, "PerfQuery result: %d", g_perf_query->GetQueryResult(bpmem.zcontrol.zcomploc ? PQ_ZCOMP_OUTPUT_ZCOMPLOC : PQ_ZCOMP_OUTPUT));
|
||||
|
||||
// run through vertex groups again to set alpha
|
||||
if (useDstAlpha && !dualSourcePossible)
|
||||
|
|
|
@ -176,6 +176,12 @@ u32 VideoSoftware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputDa
|
|||
return value;
|
||||
}
|
||||
|
||||
u32 VideoSoftware::Video_GetQueryResult(PerfQueryType type)
|
||||
{
|
||||
// TODO:
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool VideoSoftware::Video_Screenshot(const char *_szFilename)
|
||||
{
|
||||
return false;
|
||||
|
|
|
@ -26,7 +26,9 @@ class VideoSoftware : public VideoBackend
|
|||
void Video_ExitLoop();
|
||||
void Video_BeginField(u32, FieldType, u32, u32);
|
||||
void Video_EndField();
|
||||
|
||||
u32 Video_AccessEFB(EFBAccessType, u32, u32, u32);
|
||||
u32 Video_GetQueryResult(PerfQueryType type);
|
||||
|
||||
void Video_AddMessage(const char* pstr, unsigned int milliseconds);
|
||||
void Video_ClearMessages();
|
||||
|
|
Loading…
Reference in New Issue