diff --git a/Source/Core/VideoCommon/Src/RenderBase.h b/Source/Core/VideoCommon/Src/RenderBase.h index 7f2853bcc3..55678f3f5a 100644 --- a/Source/Core/VideoCommon/Src/RenderBase.h +++ b/Source/Core/VideoCommon/Src/RenderBase.h @@ -128,11 +128,6 @@ public: static unsigned int GetPrevPixelFormat() { return prev_efb_format; } static void StorePixelFormat(unsigned int new_format) { prev_efb_format = new_format; } - virtual void ResetPixelPerf() {}; - virtual void ResumePixelPerf(bool efb_copies) {}; - virtual void PausePixelPerf(bool efb_copies) {}; - virtual u32 GetPixelPerfResult(PixelPerfQuery type) { return 0; }; - // TODO: doesn't belong here virtual void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) = 0; virtual void SetPSConstant4fv(unsigned int const_number, const float *f) = 0; diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index 86b2e03221..330b23d370 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -101,6 +101,9 @@ Base + + Base + Base @@ -113,8 +116,6 @@ Shader Generators - - Base Util @@ -239,6 +240,9 @@ Base + + Base + Base @@ -251,8 +255,6 @@ Shader Generators - - Base Util diff --git a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj index 52d1c37aed..aab9345ef7 100644 --- a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj +++ b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj @@ -199,6 +199,7 @@ + @@ -228,6 +229,7 @@ + diff --git a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters index 6492e887ca..4b8efac92b 100644 --- a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters +++ b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters @@ -57,6 +57,9 @@ Render + + Render + @@ -117,6 +120,9 @@ Render + + Render + diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp new file mode 100644 index 0000000000..6ab91fed27 --- /dev/null +++ b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp @@ -0,0 +1,148 @@ +#include "RenderBase.h" + +#include "D3DBase.h" +#include "PerfQuery.h" + +namespace DX11 { + +PerfQuery::PerfQuery() + : m_query_read_pos() + , m_query_count() +{ + for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i) + { + D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0); + D3D::device->CreateQuery(&qdesc, &m_query_buffer[i].query); + } + ResetQuery(); +} + +PerfQuery::~PerfQuery() +{ + for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i) + { + // TODO: EndQuery? + m_query_buffer[i].query->Release(); + } +} + +void PerfQuery::EnableQuery(PerfQueryGroup type) +{ + // Is this sane? + if (m_query_count > ARRAYSIZE(m_query_buffer) / 2) + WeakFlush(); + + if (ARRAYSIZE(m_query_buffer) == m_query_count) + { + // TODO + FlushOne(); + ERROR_LOG(VIDEO, "flushed query buffer early!"); + } + + // start query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)]; + + D3D::context->Begin(entry.query); + entry.query_type = type; + + ++m_query_count; + } +} + +void PerfQuery::DisableQuery(PerfQueryGroup type) +{ + // stop query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + ARRAYSIZE(m_query_buffer)-1) % ARRAYSIZE(m_query_buffer)]; + D3D::context->End(entry.query); + } +} + +void PerfQuery::ResetQuery() +{ + m_query_count = 0; + std::fill_n(m_results, ARRAYSIZE(m_results), 0); +} + +u32 PerfQuery::GetQueryResult(PerfQueryType type) +{ + u32 result = 0; + + if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) + { + result = m_results[PQG_ZCOMP_ZCOMPLOC]; + } + else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT) + { + result = m_results[PQG_ZCOMP]; + } + else if (type == PQ_BLEND_INPUT) + { + result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC]; + } + else if (type == PQ_EFB_COPY_CLOCKS) + { + result = m_results[PQG_EFB_COPY_CLOCKS]; + } + + return result / 4; +} + +void PerfQuery::FlushOne() +{ + auto& entry = m_query_buffer[m_query_read_pos]; + + UINT64 result = 0; + HRESULT hr = S_FALSE; + while (hr != S_OK) + { + // TODO: Might cause us to be stuck in an infinite loop! + hr = D3D::context->GetData(entry.query, &result, sizeof(result), 0); + } + + m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight(); + + m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer); + --m_query_count; +} + +// TODO: could selectively flush things, but I don't think that will do much +void PerfQuery::FlushResults() +{ + while (!IsFlushed()) + FlushOne(); +} + +void PerfQuery::WeakFlush() +{ + while (!IsFlushed()) + { + auto& entry = m_query_buffer[m_query_read_pos]; + + UINT64 result = 0; + HRESULT hr = D3D::context->GetData(entry.query, &result, sizeof(result), D3D11_ASYNC_GETDATA_DONOTFLUSH); + + if (hr == S_OK) + { + m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight(); + + m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer); + --m_query_count; + } + else + { + break; + } + } +} + +bool PerfQuery::IsFlushed() const +{ + return 0 == m_query_count; +} + + +} // namespace diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h new file mode 100644 index 0000000000..b3709d1013 --- /dev/null +++ b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h @@ -0,0 +1,46 @@ +#ifndef _PERFQUERY_H_ +#define _PERFQUERY_H_ + +#include "PerfQueryBase.h" + +namespace DX11 { + +class PerfQuery : public PerfQueryBase +{ +public: + PerfQuery(); + ~PerfQuery(); + + void EnableQuery(PerfQueryGroup type); + void DisableQuery(PerfQueryGroup type); + void ResetQuery(); + u32 GetQueryResult(PerfQueryType type); + void FlushResults(); + bool IsFlushed() const; + +private: + struct ActiveQuery + { + ID3D11Query* query; + PerfQueryGroup query_type; + }; + + void WeakFlush(); + + // Only use when non-empty + void FlushOne(); + + // when testing in SMS: 64 was too small, 128 was ok + static const int PERF_QUERY_BUFFER_SIZE = 512; + + ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE]; + int m_query_read_pos; + + // TODO: sloppy + volatile int m_query_count; + volatile u32 m_results[PQG_NUM_MEMBERS]; +}; + +} // namespace + +#endif // _PERFQUERY_H_ diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp index 882d20f8e6..2dcfcd041c 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp @@ -65,18 +65,6 @@ ID3D11RasterizerState* resetraststate = NULL; static ID3D11Texture2D* s_screenshot_texture = NULL; -// Using a vector of query objects to avoid flushing the gpu pipeline all the time -// TODO: Could probably optimized further by using a ring buffer or something -#define MAX_PIXEL_PERF_QUERIES 20 // 20 is an arbitrary guess -std::vector pixel_perf_queries; -static int pixel_perf_query_index = 0; - -static u64 pixel_perf = 0; -static bool pixel_perf_active = false; -static bool pixel_perf_dirty = false; - -ID3D11Query* gpu_finished_query = NULL; - // GX pipeline state struct @@ -170,9 +158,6 @@ void SetupDeviceObjects() D3D::SetDebugObjectName((ID3D11DeviceChild*)resetraststate, "rasterizer state for Renderer::ResetAPIState"); s_screenshot_texture = NULL; - - D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_EVENT, 0); - D3D::device->CreateQuery(&qdesc, &gpu_finished_query); } // Kill off all device objects @@ -180,12 +165,6 @@ void TeardownDeviceObjects() { delete g_framebuffer_manager; - while (!pixel_perf_queries.empty()) - { - SAFE_RELEASE(pixel_perf_queries.back()); - pixel_perf_queries.pop_back(); - } - SAFE_RELEASE(gpu_finished_query); SAFE_RELEASE(access_efb_cbuf); SAFE_RELEASE(clearblendstates[0]); SAFE_RELEASE(clearblendstates[1]); @@ -232,11 +211,6 @@ Renderer::Renderer() s_LastEFBScale = g_ActiveConfig.iEFBScale; CalculateTargetSize(s_backbuffer_width, s_backbuffer_height); - pixel_perf_query_index = 0; - pixel_perf = 0; - pixel_perf_active = false; - pixel_perf_dirty = false; - SetupDeviceObjects(); @@ -660,112 +634,6 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); } -void Renderer::ResetPixelPerf() -{ - if (g_ActiveConfig.bDisablePixelPerf) - return; - - if (pixel_perf_active) - PausePixelPerf(false); - - pixel_perf_query_index = 0; - pixel_perf = 0; -} - -void Renderer::ResumePixelPerf(bool efb_copies) -{ - if (g_ActiveConfig.bDisablePixelPerf) - return; - - if (efb_copies) - return; - - if(pixel_perf_active) - return; - - if (pixel_perf_queries.size() < pixel_perf_query_index+1 && pixel_perf_query_index < MAX_PIXEL_PERF_QUERIES) - { - D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0); - ID3D11Query* tmpquery = NULL; - D3D::device->CreateQuery(&qdesc, &tmpquery); - pixel_perf_queries.push_back(tmpquery); - pixel_perf_query_index = pixel_perf_queries.size() - 1; - } - else if (pixel_perf_queries.size() < pixel_perf_query_index+1) - { - StorePixelPerfResult(PP_ZCOMP_OUTPUT); - pixel_perf_query_index = 0; - } - // This will spam the D3D11 debug runtime output with QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS warnings which safely can be ignored. Mute them in the DX control panel if you need to read the debug runtime output. - D3D::context->Begin(pixel_perf_queries[pixel_perf_query_index]); - pixel_perf_active = true; - pixel_perf_dirty = true; -} - -void Renderer::PausePixelPerf(bool efb_copies) -{ - if (g_ActiveConfig.bDisablePixelPerf) - return; - - if(!pixel_perf_active) - return; - - D3D::context->End(pixel_perf_queries[pixel_perf_query_index]); - pixel_perf_query_index++; - pixel_perf_active = false; -} - -void Renderer::StorePixelPerfResult(PixelPerfQuery type) -{ - // First, make sure the GPU has finished rendering so that query results are valid - D3D::context->End(gpu_finished_query); - BOOL gpu_finished = FALSE; - while (!gpu_finished) - { - // If nothing goes horribly wrong here, this should complete in finite time... - D3D::context->GetData(gpu_finished_query, &gpu_finished, sizeof(gpu_finished), 0); - } - - for(int i = 0; i < pixel_perf_query_index; ++i) - { - UINT64 buf = 0; - D3D::context->GetData(pixel_perf_queries[i], &buf, sizeof(buf), 0); - - // Reported pixel metrics should be referenced to native resolution: - pixel_perf += buf * EFB_WIDTH * EFB_HEIGHT / GetTargetWidth() / GetTargetHeight(); - } - pixel_perf_dirty = false; -} - -u32 Renderer::GetPixelPerfResult(PixelPerfQuery type) -{ - if (g_ActiveConfig.bDisablePixelPerf) - return 0; - - if (type == PP_EFB_COPY_CLOCKS) - { - // not implemented - return 0; - } - - if (type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L || - type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H || - type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L || - type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H) - { - // return zero for now because ZCOMP_OUTPUT_ZCOMPLOC + ZCOMP_OUTPUT should equal BLEND_INPUT - // TODO: Instead, should keep separate counters for zcomploc and non-zcomploc registers. - return 0; - } - - // Basically we only implement PP_ZCOMP_OUTPUT, but we're returning the same value for PP_ZCOMP_INPUT and PP_BLEND_INPUT anyway - if (pixel_perf_dirty) - StorePixelPerfResult(PP_ZCOMP_OUTPUT); - - // Dividing by 4 because we're expected to return the number of 2x2 quads instead of pixels - return std::min(pixel_perf / 4, (u64)0xFFFFFFFF); -} - void SetSrcBlend(D3D11_BLEND val) { // Colors should blend against SRC_ALPHA diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.h b/Source/Plugins/Plugin_VideoDX11/Src/Render.h index 6db829c205..8f6c78fae1 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/Render.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.h @@ -46,12 +46,6 @@ public: void ReinterpretPixelData(unsigned int convtype); - void ResetPixelPerf(); - void ResumePixelPerf(bool efb_copies); - void PausePixelPerf(bool efb_copies); - u32 GetPixelPerfResult(PixelPerfQuery type); - void StorePixelPerfResult(PixelPerfQuery type); // internal - void UpdateViewport(Matrix44& vpCorrection); bool SaveScreenshot(const std::string &filename, const TargetRectangle &rc); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp index 8137e1a39f..6991b11690 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp @@ -274,9 +274,9 @@ void VertexManager::vFlush() g_nativeVertexFmt->SetupVertexPointers(); g_renderer->ApplyState(useDstAlpha); - g_renderer->ResumePixelPerf(false); + g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); Draw(stride); - g_renderer->PausePixelPerf(false); + g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp index b7dd9101d3..af4d57dbf9 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp @@ -42,6 +42,7 @@ #include "D3DUtil.h" #include "D3DBase.h" +#include "PerfQuery.h" #include "PixelShaderCache.h" #include "TextureCache.h" #include "VertexManager.h" @@ -185,6 +186,7 @@ void VideoBackend::Video_Prepare() g_renderer = new Renderer; g_texture_cache = new TextureCache; g_vertex_manager = new VertexManager; + g_perf_query = new PerfQuery; VertexShaderCache::Init(); PixelShaderCache::Init(); D3D::InitUtils(); @@ -227,6 +229,7 @@ void VideoBackend::Shutdown() D3D::ShutdownUtils(); PixelShaderCache::Shutdown(); VertexShaderCache::Shutdown(); + delete g_perf_query; delete g_vertex_manager; delete g_texture_cache; delete g_renderer; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h index 76040272e3..34c64e43a1 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h @@ -17,25 +17,25 @@ public: u32 GetQueryResult(PerfQueryType type); void FlushResults(); bool IsFlushed() const; - + private: struct ActiveQuery { GLuint query_id; PerfQueryGroup query_type; }; - + // when testing in SMS: 64 was too small, 128 was ok static const int PERF_QUERY_BUFFER_SIZE = 512; - + void WeakFlush(); // Only use when non-empty void FlushOne(); - + // This contains gl query objects with unretrieved results. ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE]; int m_query_read_pos; - + // TODO: sloppy volatile int m_query_count; volatile u32 m_results[PQG_NUM_MEMBERS];