dolphin/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm

// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoBackends/Metal/MTLPerfQuery.h"

#include "VideoBackends/Metal/MTLStateTracker.h"

void Metal::PerfQuery::EnableQuery(PerfQueryGroup type)
{
  if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
    g_state_tracker->EnablePerfQuery(type, m_current_query);
}

void Metal::PerfQuery::DisableQuery(PerfQueryGroup type)
{
  if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
    g_state_tracker->DisablePerfQuery();
}

void Metal::PerfQuery::ResetQuery()
{
  std::lock_guard<std::mutex> lock(m_results_mtx);
  m_current_query++;
  for (std::atomic<u32>& result : m_results)
    result.store(0, std::memory_order_relaxed);
}

u32 Metal::PerfQuery::GetQueryResult(PerfQueryType type)
{
  u32 result = 0;
  if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
  {
    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
  }
  else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
  {
    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
  }
  else if (type == PQ_BLEND_INPUT)
  {
    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
             m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
  }
  else if (type == PQ_EFB_COPY_CLOCKS)
  {
    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
  }

  return result;
}

void Metal::PerfQuery::FlushResults()
{
  if (IsFlushed())
    return;

  // There's a possibility that some active performance queries are unflushed
  g_state_tracker->FlushEncoders();
  g_state_tracker->NotifyOfCPUGPUSync();

  std::unique_lock<std::mutex> lock(m_results_mtx);
  while (!IsFlushed())
    m_cv.wait(lock);
}

bool Metal::PerfQuery::IsFlushed() const
{
  return m_query_count.load(std::memory_order_acquire) == 0;
}

void Metal::PerfQuery::ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count,
                                     u32 query_id)
{
  {
    std::lock_guard<std::mutex> lock(m_results_mtx);
    if (m_current_query == query_id)
    {
      for (size_t i = 0; i < count; ++i)
      {
        u64 native_res_result = data[i] * (EFB_WIDTH * EFB_HEIGHT) /
                                (g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());

        native_res_result /= g_ActiveConfig.iMultisamples;

        m_results[groups[i]].fetch_add(native_res_result, std::memory_order_relaxed);
      }
    }
    m_query_count.fetch_sub(1, std::memory_order_release);
  }
  m_cv.notify_one();
}
VideoBackends: Add Metal renderer 2022-06-01 09:58:13 +00:00			`// Copyright 2022 Dolphin Emulator Project`
			`// SPDX-License-Identifier: GPL-2.0-or-later`

			`#include "VideoBackends/Metal/MTLPerfQuery.h"`
VideoBackends:Metal: Implement PerfQuery 2022-06-26 06:04:53 +00:00
			`#include "VideoBackends/Metal/MTLStateTracker.h"`

			`void Metal::PerfQuery::EnableQuery(PerfQueryGroup type)`
			`{`
			`if (type == PQG_ZCOMP_ZCOMPLOC \|\| type == PQG_ZCOMP)`
			`g_state_tracker->EnablePerfQuery(type, m_current_query);`
			`}`

			`void Metal::PerfQuery::DisableQuery(PerfQueryGroup type)`
			`{`
			`if (type == PQG_ZCOMP_ZCOMPLOC \|\| type == PQG_ZCOMP)`
			`g_state_tracker->DisablePerfQuery();`
			`}`

			`void Metal::PerfQuery::ResetQuery()`
			`{`
			`std::lock_guard<std::mutex> lock(m_results_mtx);`
			`m_current_query++;`
			`for (std::atomic<u32>& result : m_results)`
			`result.store(0, std::memory_order_relaxed);`
			`}`

			`u32 Metal::PerfQuery::GetQueryResult(PerfQueryType type)`
			`{`
			`u32 result = 0;`
			`if (type == PQ_ZCOMP_INPUT_ZCOMPLOC \|\| type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)`
			`{`
			`result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);`
			`}`
			`else if (type == PQ_ZCOMP_INPUT \|\| type == PQ_ZCOMP_OUTPUT)`
			`{`
			`result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);`
			`}`
			`else if (type == PQ_BLEND_INPUT)`
			`{`
			`result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +`
			`m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);`
			`}`
			`else if (type == PQ_EFB_COPY_CLOCKS)`
			`{`
			`result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);`
			`}`

			`return result;`
			`}`

			`void Metal::PerfQuery::FlushResults()`
			`{`
			`if (IsFlushed())`
			`return;`

			`// There's a possibility that some active performance queries are unflushed`
			`g_state_tracker->FlushEncoders();`
VideoBackends:Metal: Bring back unified memory config Turns out it was helpful. (Most improvement in ubershaders.) This time with much better auto mode. 2022-07-23 22:27:24 +00:00			`g_state_tracker->NotifyOfCPUGPUSync();`
VideoBackends:Metal: Implement PerfQuery 2022-06-26 06:04:53 +00:00
			`std::unique_lock<std::mutex> lock(m_results_mtx);`
			`while (!IsFlushed())`
			`m_cv.wait(lock);`
			`}`

			`bool Metal::PerfQuery::IsFlushed() const`
			`{`
			`return m_query_count.load(std::memory_order_acquire) == 0;`
			`}`

			`void Metal::PerfQuery::ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count,`
			`u32 query_id)`
			`{`
			`{`
			`std::lock_guard<std::mutex> lock(m_results_mtx);`
			`if (m_current_query == query_id)`
			`{`
			`for (size_t i = 0; i < count; ++i)`
			`{`
			`u64 native_res_result = data[i] * (EFB_WIDTH * EFB_HEIGHT) /`
			`(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());`

			`native_res_result /= g_ActiveConfig.iMultisamples;`

			`m_results[groups[i]].fetch_add(native_res_result, std::memory_order_relaxed);`
			`}`
			`}`
			`m_query_count.fetch_sub(1, std::memory_order_release);`
			`}`
			`m_cv.notify_one();`
			`}`