From ee3f2b8fcb953151fb6617f31d7a96bb7ebc12a5 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 26 Jun 2022 01:04:53 -0500 Subject: [PATCH] VideoBackends:Metal: Implement PerfQuery --- .../Core/VideoBackends/Metal/MTLPerfQuery.h | 26 +++-- .../Core/VideoBackends/Metal/MTLPerfQuery.mm | 86 +++++++++++++++++ .../VideoBackends/Metal/MTLStateTracker.h | 11 +++ .../VideoBackends/Metal/MTLStateTracker.mm | 94 ++++++++++++++++++- 4 files changed, 210 insertions(+), 7 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLPerfQuery.h b/Source/Core/VideoBackends/Metal/MTLPerfQuery.h index 793cf8cec5..993b92e93c 100644 --- a/Source/Core/VideoBackends/Metal/MTLPerfQuery.h +++ b/Source/Core/VideoBackends/Metal/MTLPerfQuery.h @@ -3,6 +3,9 @@ #pragma once +#include +#include + #include "VideoCommon/PerfQueryBase.h" namespace Metal @@ -10,11 +13,22 @@ namespace Metal class PerfQuery final : public PerfQueryBase { public: - void EnableQuery(PerfQueryGroup type) override {} - void DisableQuery(PerfQueryGroup type) override {} - void ResetQuery() override {} - u32 GetQueryResult(PerfQueryType type) override { return 0; } - void FlushResults() override {} - bool IsFlushed() const override { return true; } + void EnableQuery(PerfQueryGroup type) override; + void DisableQuery(PerfQueryGroup type) override; + void ResetQuery() override; + u32 GetQueryResult(PerfQueryType type) override; + void FlushResults() override; + bool IsFlushed() const override; + + /// Notify PerfQuery of a new pending encoder + /// One call to ReturnResults should be made for every call to IncCount + void IncCount() { m_query_count.fetch_add(1, std::memory_order_relaxed); } + /// May be called from any thread + void ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count, u32 query_id); + +private: + u32 m_current_query = 0; + std::mutex m_results_mtx; + std::condition_variable m_cv; }; } // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm b/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm index 2892bdc747..42139e63bf 100644 --- a/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm +++ b/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm @@ -2,3 +2,89 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "VideoBackends/Metal/MTLPerfQuery.h" + +#include "VideoBackends/Metal/MTLStateTracker.h" + +void Metal::PerfQuery::EnableQuery(PerfQueryGroup type) +{ + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + g_state_tracker->EnablePerfQuery(type, m_current_query); +} + +void Metal::PerfQuery::DisableQuery(PerfQueryGroup type) +{ + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + g_state_tracker->DisablePerfQuery(); +} + +void Metal::PerfQuery::ResetQuery() +{ + std::lock_guard lock(m_results_mtx); + m_current_query++; + for (std::atomic& result : m_results) + result.store(0, std::memory_order_relaxed); +} + +u32 Metal::PerfQuery::GetQueryResult(PerfQueryType type) +{ + u32 result = 0; + if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) + { + result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed); + } + else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT) + { + result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed); + } + else if (type == PQ_BLEND_INPUT) + { + result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) + + m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed); + } + else if (type == PQ_EFB_COPY_CLOCKS) + { + result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed); + } + + return result; +} + +void Metal::PerfQuery::FlushResults() +{ + if (IsFlushed()) + return; + + // There's a possibility that some active performance queries are unflushed + g_state_tracker->FlushEncoders(); + + std::unique_lock lock(m_results_mtx); + while (!IsFlushed()) + m_cv.wait(lock); +} + +bool Metal::PerfQuery::IsFlushed() const +{ + return m_query_count.load(std::memory_order_acquire) == 0; +} + +void Metal::PerfQuery::ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count, + u32 query_id) +{ + { + std::lock_guard lock(m_results_mtx); + if (m_current_query == query_id) + { + for (size_t i = 0; i < count; ++i) + { + u64 native_res_result = data[i] * (EFB_WIDTH * EFB_HEIGHT) / + (g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight()); + + native_res_result /= g_ActiveConfig.iMultisamples; + + m_results[groups[i]].fetch_add(native_res_result, std::memory_order_relaxed); + } + } + m_query_count.fetch_sub(1, std::memory_order_release); + } + m_cv.notify_one(); +} diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.h b/Source/Core/VideoBackends/Metal/MTLStateTracker.h index ac930767c4..1807f345e4 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.h +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.h @@ -17,6 +17,7 @@ #include "VideoBackends/Metal/MTLTexture.h" #include "VideoBackends/Metal/MTLUtil.h" +#include "VideoCommon/PerfQueryBase.h" #include "VideoCommon/RenderBase.h" namespace Metal @@ -90,6 +91,8 @@ public: void SetFragmentBufferNow(u32 idx, id buffer, u32 offset); /// Use around utility draws that are commonly used immediately before gx draws to the same buffer void EnableEncoderLabel(bool enabled) { m_flags.should_apply_label = enabled; } + void EnablePerfQuery(PerfQueryGroup group, u32 query_id); + void DisablePerfQuery(); void UnbindTexture(id texture); void Draw(u32 base_vertex, u32 num_vertices); @@ -157,8 +160,10 @@ private: }; struct Backref; + struct PerfQueryTracker; std::shared_ptr m_backref; + std::vector> m_perf_query_tracker_cache; MRCOwned> m_fence; MRCOwned> m_upload_cmdbuf; MRCOwned> m_upload_encoder; @@ -224,7 +229,9 @@ private: MTLDepthClipMode depth_clip_mode; MTLCullMode cull_mode; DepthStencilSelector depth_stencil; + PerfQueryGroup perf_query_group; } m_current; + std::shared_ptr m_current_perf_query; /// Things that represent what we'd *like* to have on the encoder for the next draw struct State @@ -250,8 +257,12 @@ private: id texels = nullptr; u32 texel_buffer_offset0; u32 texel_buffer_offset1; + PerfQueryGroup perf_query_group = static_cast(-1); } m_state; + u32 m_perf_query_tracker_counter = 0; + + std::shared_ptr NewPerfQueryTracker(); void SetSamplerForce(u32 idx, const SamplerState& sampler); void Sync(BufferPair& buffer); Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt); diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm index 8451f413da..66ae33b733 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -10,6 +10,7 @@ #include "Common/BitUtils.h" #include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLPerfQuery.h" #include "VideoBackends/Metal/MTLPipeline.h" #include "VideoBackends/Metal/MTLTexture.h" #include "VideoBackends/Metal/MTLUtil.h" @@ -19,6 +20,8 @@ #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" +static constexpr u32 PERF_QUERY_BUFFER_SIZE = 512; + std::unique_ptr Metal::g_state_tracker; struct Metal::StateTracker::Backref @@ -28,6 +31,14 @@ struct Metal::StateTracker::Backref explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {} }; +struct Metal::StateTracker::PerfQueryTracker +{ + MRCOwned> buffer; + const u64* contents; + std::vector groups; + u32 query_id; +}; + static NSString* GetName(Metal::StateTracker::UploadBuffer buffer) { // clang-format off @@ -328,8 +339,12 @@ void Metal::StateTracker::BeginRenderPass(MTLLoadAction load_action) void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor) { EndRenderPass(); + if (m_current_perf_query) + [descriptor setVisibilityResultBuffer:m_current_perf_query->buffer]; m_current_render_encoder = MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]); + if (m_current_perf_query) + [descriptor setVisibilityResultBuffer:nil]; if (!g_features.unified_memory) [m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex]; AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment(); @@ -347,6 +362,7 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor) m_current.depth_stencil = DepthStencilSelector(false, CompareMode::Always); m_current.depth_clip_mode = MTLDepthClipModeClip; m_current.cull_mode = MTLCullModeNone; + m_current.perf_query_group = static_cast(-1); m_flags.NewEncoder(); m_dirty_samplers = 0xff; m_dirty_textures = 0xff; @@ -411,15 +427,23 @@ void Metal::StateTracker::FlushEncoders() m_texture_upload_cmdbuf = nullptr; } [m_current_render_cmdbuf - addCompletedHandler:[backref = m_backref, draw = m_current_draw](id buf) { + addCompletedHandler:[backref = m_backref, draw = m_current_draw, + q = std::move(m_current_perf_query)](id buf) { std::lock_guard guard(backref->mtx); if (StateTracker* tracker = backref->state_tracker) { // We can do the update non-atomically because we only ever update under the lock u64 newval = std::max(draw, tracker->m_last_finished_draw.load(std::memory_order_relaxed)); tracker->m_last_finished_draw.store(newval, std::memory_order_release); + if (q) + { + if (PerfQuery* query = static_cast(g_perf_query.get())) + query->ReturnResults(q->contents, q->groups.data(), q->groups.size(), q->query_id); + tracker->m_perf_query_tracker_cache.emplace_back(std::move(q)); + } } }]; + m_current_perf_query = nullptr; [m_current_render_cmdbuf commit]; m_last_render_cmdbuf = std::move(m_current_render_cmdbuf); m_current_render_cmdbuf = nullptr; @@ -603,6 +627,57 @@ void Metal::StateTracker::SetFragmentBufferNow(u32 idx, id buffer, u3 } } +std::shared_ptr Metal::StateTracker::NewPerfQueryTracker() +{ + static_cast(g_perf_query.get())->IncCount(); + // The cache is repopulated asynchronously + std::lock_guard lock(m_backref->mtx); + if (m_perf_query_tracker_cache.empty()) + { + // Make a new one + @autoreleasepool + { + std::shared_ptr tracker = std::make_shared(); + const MTLResourceOptions options = + MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked; + id buffer = [g_device newBufferWithLength:PERF_QUERY_BUFFER_SIZE * sizeof(u64) + options:options]; + [buffer setLabel:[NSString stringWithFormat:@"PerfQuery Buffer %d", + m_perf_query_tracker_counter++]]; + tracker->buffer = MRCTransfer(buffer); + tracker->contents = static_cast([buffer contents]); + return tracker; + } + } + else + { + // Reuse an old one + std::shared_ptr tracker = std::move(m_perf_query_tracker_cache.back()); + m_perf_query_tracker_cache.pop_back(); + return tracker; + } +} + +void Metal::StateTracker::EnablePerfQuery(PerfQueryGroup group, u32 query_id) +{ + m_state.perf_query_group = group; + if (!m_current_perf_query || m_current_perf_query->query_id != query_id || + m_current_perf_query->groups.size() == PERF_QUERY_BUFFER_SIZE) + { + if (m_current_render_encoder) + EndRenderPass(); + if (!m_current_perf_query) + m_current_perf_query = NewPerfQueryTracker(); + m_current_perf_query->groups.clear(); + m_current_perf_query->query_id = query_id; + } +} + +void Metal::StateTracker::DisablePerfQuery() +{ + m_state.perf_query_group = static_cast(-1); +} + // MARK: Render // clang-format off @@ -620,6 +695,9 @@ static NSRange RangeOfBits(u32 value) void Metal::StateTracker::PrepareRender() { + // BeginRenderPass needs this + if (m_state.perf_query_group != static_cast(-1) && !m_current_perf_query) + m_current_perf_query = NewPerfQueryTracker(); if (!m_current_render_encoder) BeginRenderPass(MTLLoadActionLoad); id enc = m_current_render_encoder; @@ -710,6 +788,20 @@ void Metal::StateTracker::PrepareRender() lodMaxClamps:m_state.sampler_max_lod.data() withRange:range]; } + if (m_state.perf_query_group != m_current.perf_query_group) + { + m_current.perf_query_group = m_state.perf_query_group; + if (m_state.perf_query_group == static_cast(-1)) + { + [enc setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0]; + } + else + { + [enc setVisibilityResultMode:MTLVisibilityResultModeCounting + offset:m_current_perf_query->groups.size() * 8]; + m_current_perf_query->groups.push_back(m_state.perf_query_group); + } + } if (is_gx) { // GX draw