diff --git a/Source/Core/VideoCommon/Src/RenderBase.h b/Source/Core/VideoCommon/Src/RenderBase.h
index 7f2853bcc3..55678f3f5a 100644
--- a/Source/Core/VideoCommon/Src/RenderBase.h
+++ b/Source/Core/VideoCommon/Src/RenderBase.h
@@ -128,11 +128,6 @@ public:
static unsigned int GetPrevPixelFormat() { return prev_efb_format; }
static void StorePixelFormat(unsigned int new_format) { prev_efb_format = new_format; }
- virtual void ResetPixelPerf() {};
- virtual void ResumePixelPerf(bool efb_copies) {};
- virtual void PausePixelPerf(bool efb_copies) {};
- virtual u32 GetPixelPerfResult(PixelPerfQuery type) { return 0; };
-
// TODO: doesn't belong here
virtual void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) = 0;
virtual void SetPSConstant4fv(unsigned int const_number, const float *f) = 0;
diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters
index 86b2e03221..330b23d370 100644
--- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters
+++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters
@@ -101,6 +101,9 @@
Base
+
+ Base
+
Base
@@ -113,8 +116,6 @@
Shader Generators
-
- Base
Util
@@ -239,6 +240,9 @@
Base
+
+ Base
+
Base
@@ -251,8 +255,6 @@
Shader Generators
-
- Base
Util
diff --git a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj
index 52d1c37aed..aab9345ef7 100644
--- a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj
+++ b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj
@@ -199,6 +199,7 @@
+
@@ -228,6 +229,7 @@
+
diff --git a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters
index 6492e887ca..4b8efac92b 100644
--- a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters
+++ b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters
@@ -57,6 +57,9 @@
Render
+
+ Render
+
@@ -117,6 +120,9 @@
Render
+
+ Render
+
diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp
new file mode 100644
index 0000000000..6ab91fed27
--- /dev/null
+++ b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp
@@ -0,0 +1,148 @@
+#include "RenderBase.h"
+
+#include "D3DBase.h"
+#include "PerfQuery.h"
+
+namespace DX11 {
+
+PerfQuery::PerfQuery()
+ : m_query_read_pos()
+ , m_query_count()
+{
+ for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
+ {
+ D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0);
+ D3D::device->CreateQuery(&qdesc, &m_query_buffer[i].query);
+ }
+ ResetQuery();
+}
+
+PerfQuery::~PerfQuery()
+{
+ for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
+ {
+ // TODO: EndQuery?
+ m_query_buffer[i].query->Release();
+ }
+}
+
+void PerfQuery::EnableQuery(PerfQueryGroup type)
+{
+ // Is this sane?
+ if (m_query_count > ARRAYSIZE(m_query_buffer) / 2)
+ WeakFlush();
+
+ if (ARRAYSIZE(m_query_buffer) == m_query_count)
+ {
+ // TODO
+ FlushOne();
+ ERROR_LOG(VIDEO, "flushed query buffer early!");
+ }
+
+ // start query
+ if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
+ {
+ auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)];
+
+ D3D::context->Begin(entry.query);
+ entry.query_type = type;
+
+ ++m_query_count;
+ }
+}
+
+void PerfQuery::DisableQuery(PerfQueryGroup type)
+{
+ // stop query
+ if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
+ {
+ auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + ARRAYSIZE(m_query_buffer)-1) % ARRAYSIZE(m_query_buffer)];
+ D3D::context->End(entry.query);
+ }
+}
+
+void PerfQuery::ResetQuery()
+{
+ m_query_count = 0;
+ std::fill_n(m_results, ARRAYSIZE(m_results), 0);
+}
+
+u32 PerfQuery::GetQueryResult(PerfQueryType type)
+{
+ u32 result = 0;
+
+ if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
+ {
+ result = m_results[PQG_ZCOMP_ZCOMPLOC];
+ }
+ else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
+ {
+ result = m_results[PQG_ZCOMP];
+ }
+ else if (type == PQ_BLEND_INPUT)
+ {
+ result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
+ }
+ else if (type == PQ_EFB_COPY_CLOCKS)
+ {
+ result = m_results[PQG_EFB_COPY_CLOCKS];
+ }
+
+ return result / 4;
+}
+
+void PerfQuery::FlushOne()
+{
+ auto& entry = m_query_buffer[m_query_read_pos];
+
+ UINT64 result = 0;
+ HRESULT hr = S_FALSE;
+ while (hr != S_OK)
+ {
+ // TODO: Might cause us to be stuck in an infinite loop!
+ hr = D3D::context->GetData(entry.query, &result, sizeof(result), 0);
+ }
+
+ m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight();
+
+ m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
+ --m_query_count;
+}
+
+// TODO: could selectively flush things, but I don't think that will do much
+void PerfQuery::FlushResults()
+{
+ while (!IsFlushed())
+ FlushOne();
+}
+
+void PerfQuery::WeakFlush()
+{
+ while (!IsFlushed())
+ {
+ auto& entry = m_query_buffer[m_query_read_pos];
+
+ UINT64 result = 0;
+ HRESULT hr = D3D::context->GetData(entry.query, &result, sizeof(result), D3D11_ASYNC_GETDATA_DONOTFLUSH);
+
+ if (hr == S_OK)
+ {
+ m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight();
+
+ m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
+ --m_query_count;
+ }
+ else
+ {
+ break;
+ }
+ }
+}
+
+bool PerfQuery::IsFlushed() const
+{
+ return 0 == m_query_count;
+}
+
+
+} // namespace
diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h
new file mode 100644
index 0000000000..b3709d1013
--- /dev/null
+++ b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h
@@ -0,0 +1,46 @@
+#ifndef _PERFQUERY_H_
+#define _PERFQUERY_H_
+
+#include "PerfQueryBase.h"
+
+namespace DX11 {
+
+class PerfQuery : public PerfQueryBase
+{
+public:
+ PerfQuery();
+ ~PerfQuery();
+
+ void EnableQuery(PerfQueryGroup type);
+ void DisableQuery(PerfQueryGroup type);
+ void ResetQuery();
+ u32 GetQueryResult(PerfQueryType type);
+ void FlushResults();
+ bool IsFlushed() const;
+
+private:
+ struct ActiveQuery
+ {
+ ID3D11Query* query;
+ PerfQueryGroup query_type;
+ };
+
+ void WeakFlush();
+
+ // Only use when non-empty
+ void FlushOne();
+
+ // when testing in SMS: 64 was too small, 128 was ok
+ static const int PERF_QUERY_BUFFER_SIZE = 512;
+
+ ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
+ int m_query_read_pos;
+
+ // TODO: sloppy
+ volatile int m_query_count;
+ volatile u32 m_results[PQG_NUM_MEMBERS];
+};
+
+} // namespace
+
+#endif // _PERFQUERY_H_
diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp
index 882d20f8e6..2dcfcd041c 100644
--- a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp
+++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp
@@ -65,18 +65,6 @@ ID3D11RasterizerState* resetraststate = NULL;
static ID3D11Texture2D* s_screenshot_texture = NULL;
-// Using a vector of query objects to avoid flushing the gpu pipeline all the time
-// TODO: Could probably optimized further by using a ring buffer or something
-#define MAX_PIXEL_PERF_QUERIES 20 // 20 is an arbitrary guess
-std::vector pixel_perf_queries;
-static int pixel_perf_query_index = 0;
-
-static u64 pixel_perf = 0;
-static bool pixel_perf_active = false;
-static bool pixel_perf_dirty = false;
-
-ID3D11Query* gpu_finished_query = NULL;
-
// GX pipeline state
struct
@@ -170,9 +158,6 @@ void SetupDeviceObjects()
D3D::SetDebugObjectName((ID3D11DeviceChild*)resetraststate, "rasterizer state for Renderer::ResetAPIState");
s_screenshot_texture = NULL;
-
- D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_EVENT, 0);
- D3D::device->CreateQuery(&qdesc, &gpu_finished_query);
}
// Kill off all device objects
@@ -180,12 +165,6 @@ void TeardownDeviceObjects()
{
delete g_framebuffer_manager;
- while (!pixel_perf_queries.empty())
- {
- SAFE_RELEASE(pixel_perf_queries.back());
- pixel_perf_queries.pop_back();
- }
- SAFE_RELEASE(gpu_finished_query);
SAFE_RELEASE(access_efb_cbuf);
SAFE_RELEASE(clearblendstates[0]);
SAFE_RELEASE(clearblendstates[1]);
@@ -232,11 +211,6 @@ Renderer::Renderer()
s_LastEFBScale = g_ActiveConfig.iEFBScale;
CalculateTargetSize(s_backbuffer_width, s_backbuffer_height);
- pixel_perf_query_index = 0;
- pixel_perf = 0;
- pixel_perf_active = false;
- pixel_perf_dirty = false;
-
SetupDeviceObjects();
@@ -660,112 +634,6 @@ void Renderer::ReinterpretPixelData(unsigned int convtype)
D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV());
}
-void Renderer::ResetPixelPerf()
-{
- if (g_ActiveConfig.bDisablePixelPerf)
- return;
-
- if (pixel_perf_active)
- PausePixelPerf(false);
-
- pixel_perf_query_index = 0;
- pixel_perf = 0;
-}
-
-void Renderer::ResumePixelPerf(bool efb_copies)
-{
- if (g_ActiveConfig.bDisablePixelPerf)
- return;
-
- if (efb_copies)
- return;
-
- if(pixel_perf_active)
- return;
-
- if (pixel_perf_queries.size() < pixel_perf_query_index+1 && pixel_perf_query_index < MAX_PIXEL_PERF_QUERIES)
- {
- D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0);
- ID3D11Query* tmpquery = NULL;
- D3D::device->CreateQuery(&qdesc, &tmpquery);
- pixel_perf_queries.push_back(tmpquery);
- pixel_perf_query_index = pixel_perf_queries.size() - 1;
- }
- else if (pixel_perf_queries.size() < pixel_perf_query_index+1)
- {
- StorePixelPerfResult(PP_ZCOMP_OUTPUT);
- pixel_perf_query_index = 0;
- }
- // This will spam the D3D11 debug runtime output with QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS warnings which safely can be ignored. Mute them in the DX control panel if you need to read the debug runtime output.
- D3D::context->Begin(pixel_perf_queries[pixel_perf_query_index]);
- pixel_perf_active = true;
- pixel_perf_dirty = true;
-}
-
-void Renderer::PausePixelPerf(bool efb_copies)
-{
- if (g_ActiveConfig.bDisablePixelPerf)
- return;
-
- if(!pixel_perf_active)
- return;
-
- D3D::context->End(pixel_perf_queries[pixel_perf_query_index]);
- pixel_perf_query_index++;
- pixel_perf_active = false;
-}
-
-void Renderer::StorePixelPerfResult(PixelPerfQuery type)
-{
- // First, make sure the GPU has finished rendering so that query results are valid
- D3D::context->End(gpu_finished_query);
- BOOL gpu_finished = FALSE;
- while (!gpu_finished)
- {
- // If nothing goes horribly wrong here, this should complete in finite time...
- D3D::context->GetData(gpu_finished_query, &gpu_finished, sizeof(gpu_finished), 0);
- }
-
- for(int i = 0; i < pixel_perf_query_index; ++i)
- {
- UINT64 buf = 0;
- D3D::context->GetData(pixel_perf_queries[i], &buf, sizeof(buf), 0);
-
- // Reported pixel metrics should be referenced to native resolution:
- pixel_perf += buf * EFB_WIDTH * EFB_HEIGHT / GetTargetWidth() / GetTargetHeight();
- }
- pixel_perf_dirty = false;
-}
-
-u32 Renderer::GetPixelPerfResult(PixelPerfQuery type)
-{
- if (g_ActiveConfig.bDisablePixelPerf)
- return 0;
-
- if (type == PP_EFB_COPY_CLOCKS)
- {
- // not implemented
- return 0;
- }
-
- if (type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L ||
- type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H ||
- type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L ||
- type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H)
- {
- // return zero for now because ZCOMP_OUTPUT_ZCOMPLOC + ZCOMP_OUTPUT should equal BLEND_INPUT
- // TODO: Instead, should keep separate counters for zcomploc and non-zcomploc registers.
- return 0;
- }
-
- // Basically we only implement PP_ZCOMP_OUTPUT, but we're returning the same value for PP_ZCOMP_INPUT and PP_BLEND_INPUT anyway
- if (pixel_perf_dirty)
- StorePixelPerfResult(PP_ZCOMP_OUTPUT);
-
- // Dividing by 4 because we're expected to return the number of 2x2 quads instead of pixels
- return std::min(pixel_perf / 4, (u64)0xFFFFFFFF);
-}
-
void SetSrcBlend(D3D11_BLEND val)
{
// Colors should blend against SRC_ALPHA
diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.h b/Source/Plugins/Plugin_VideoDX11/Src/Render.h
index 6db829c205..8f6c78fae1 100644
--- a/Source/Plugins/Plugin_VideoDX11/Src/Render.h
+++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.h
@@ -46,12 +46,6 @@ public:
void ReinterpretPixelData(unsigned int convtype);
- void ResetPixelPerf();
- void ResumePixelPerf(bool efb_copies);
- void PausePixelPerf(bool efb_copies);
- u32 GetPixelPerfResult(PixelPerfQuery type);
- void StorePixelPerfResult(PixelPerfQuery type); // internal
-
void UpdateViewport(Matrix44& vpCorrection);
bool SaveScreenshot(const std::string &filename, const TargetRectangle &rc);
diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp
index 8137e1a39f..6991b11690 100644
--- a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp
+++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp
@@ -274,9 +274,9 @@ void VertexManager::vFlush()
g_nativeVertexFmt->SetupVertexPointers();
g_renderer->ApplyState(useDstAlpha);
- g_renderer->ResumePixelPerf(false);
+ g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
Draw(stride);
- g_renderer->PausePixelPerf(false);
+ g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
diff --git a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp
index b7dd9101d3..af4d57dbf9 100644
--- a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp
+++ b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp
@@ -42,6 +42,7 @@
#include "D3DUtil.h"
#include "D3DBase.h"
+#include "PerfQuery.h"
#include "PixelShaderCache.h"
#include "TextureCache.h"
#include "VertexManager.h"
@@ -185,6 +186,7 @@ void VideoBackend::Video_Prepare()
g_renderer = new Renderer;
g_texture_cache = new TextureCache;
g_vertex_manager = new VertexManager;
+ g_perf_query = new PerfQuery;
VertexShaderCache::Init();
PixelShaderCache::Init();
D3D::InitUtils();
@@ -227,6 +229,7 @@ void VideoBackend::Shutdown()
D3D::ShutdownUtils();
PixelShaderCache::Shutdown();
VertexShaderCache::Shutdown();
+ delete g_perf_query;
delete g_vertex_manager;
delete g_texture_cache;
delete g_renderer;
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h
index 76040272e3..34c64e43a1 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h
+++ b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h
@@ -17,25 +17,25 @@ public:
u32 GetQueryResult(PerfQueryType type);
void FlushResults();
bool IsFlushed() const;
-
+
private:
struct ActiveQuery
{
GLuint query_id;
PerfQueryGroup query_type;
};
-
+
// when testing in SMS: 64 was too small, 128 was ok
static const int PERF_QUERY_BUFFER_SIZE = 512;
-
+
void WeakFlush();
// Only use when non-empty
void FlushOne();
-
+
// This contains gl query objects with unretrieved results.
ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
int m_query_read_pos;
-
+
// TODO: sloppy
volatile int m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];