GS: Compute SW CPU per-thread not per-draw

This commit is contained in:
Connor McLaughlin 2022-03-12 14:53:47 +10:00 committed by refractionpcsx2
parent c6ce380042
commit 206f80c5f4
13 changed files with 104 additions and 109 deletions

View File

@ -592,6 +592,15 @@ static void DrawPerformanceOverlay()
PerformanceMetrics::GetGSThreadAverageTime());
DRAW_LINE(s_fixed_font, text.c_str(), IM_COL32(255, 255, 255, 255));
const u32 gs_sw_threads = PerformanceMetrics::GetGSSWThreadCount();
for (u32 i = 0; i < gs_sw_threads; i++)
{
text.Clear();
text.Write("SW-%u: %.1f%% (%.2fms)", i, PerformanceMetrics::GetGSSWThreadUsage(i),
PerformanceMetrics::GetGSSWThreadAverageTime(i));
DRAW_LINE(s_fixed_font, text.c_str(), IM_COL32(255, 255, 255, 255));
}
if (THREAD_VU1)
{
text.Clear();

View File

@ -657,21 +657,16 @@ void GSgetStats(std::string& info)
if (GSConfig.Renderer == GSRendererType::SW)
{
float sum = 0.0f;
for (int i = GSPerfMon::WorkerDraw0; i < GSPerfMon::TimerLast; i++)
sum += pm.GetTimer(static_cast<GSPerfMon::timer_t>(i));
const double fps = GetVerticalFrequency();
const double fillrate = pm.Get(GSPerfMon::Fillrate);
info = format("%s SW | %d S | %d P | %d D | %.2f U | %.2f D | %.2f mpps | %d%% WCPU",
info = format("%s SW | %d S | %d P | %d D | %.2f U | %.2f D | %.2f mpps",
api_name,
(int)pm.Get(GSPerfMon::SyncPoint),
(int)pm.Get(GSPerfMon::Prim),
(int)pm.Get(GSPerfMon::Draw),
pm.Get(GSPerfMon::Swizzle) / 1024,
pm.Get(GSPerfMon::Unswizzle) / 1024,
fps * fillrate / (1024 * 1024),
static_cast<int>(std::lround(sum)));
fps * fillrate / (1024 * 1024));
}
else if (GSConfig.Renderer == GSRendererType::Null)
{

View File

@ -538,7 +538,7 @@ bool GSCapture::BeginCapture(float fps, GSVector2i recommendedResolution, float
for (int i = 0; i < m_threads; i++)
{
m_workers.push_back(std::unique_ptr<GSPng::Worker>(new GSPng::Worker(&GSPng::Process)));
m_workers.push_back(std::unique_ptr<GSPng::Worker>(new GSPng::Worker({}, &GSPng::Process, {})));
}
m_capturing = true;

View File

@ -26,9 +26,6 @@ GSPerfMon::GSPerfMon()
{
memset(m_counters, 0, sizeof(m_counters));
memset(m_stats, 0, sizeof(m_stats));
memset(m_timer_stats, 0, sizeof(m_timer_stats));
memset(m_total, 0, sizeof(m_total));
memset(m_begin, 0, sizeof(m_begin));
}
void GSPerfMon::EndFrame()
@ -39,7 +36,6 @@ void GSPerfMon::EndFrame()
void GSPerfMon::Update()
{
#ifndef DISABLE_PERF_MON
if (m_count > 0)
{
for (size_t i = 0; i < std::size(m_counters); i++)
@ -48,55 +44,7 @@ void GSPerfMon::Update()
}
m_count = 0;
// Update CPU usage for SW renderer.
if (GSConfig.Renderer == GSRendererType::SW)
{
const u64 current = __rdtsc();
for (size_t i = WorkerDraw0; i < TimerLast; i++)
{
if (m_begin[i] == 0)
{
m_timer_stats[i] = 0.0f;
continue;
}
m_timer_stats[i] =
static_cast<float>(static_cast<double>(m_total[i]) / static_cast<double>(current - m_begin[i])
* 100.0);
m_begin[i] = 0;
m_start[i] = 0;
m_total[i] = 0;
}
}
}
memset(m_counters, 0, sizeof(m_counters));
#endif
}
void GSPerfMon::Start(int timer)
{
#ifndef DISABLE_PERF_MON
m_start[timer] = __rdtsc();
if (m_begin[timer] == 0)
{
m_begin[timer] = m_start[timer];
}
#endif
}
void GSPerfMon::Stop(int timer)
{
#ifndef DISABLE_PERF_MON
if (m_start[timer] > 0)
{
m_total[timer] += __rdtsc() - m_start[timer];
m_start[timer] = 0;
}
#endif
}

View File

@ -18,14 +18,6 @@
class GSPerfMon
{
public:
enum timer_t
{
Main,
Sync,
WorkerDraw0,
TimerLast = WorkerDraw0 + 32, // Enough space for 32 GS worker threads
};
enum counter_t
{
Prim,
@ -47,15 +39,11 @@ public:
protected:
double m_counters[CounterLast];
double m_stats[CounterLast];
float m_timer_stats[TimerLast];
u64 m_begin[TimerLast], m_total[TimerLast], m_start[TimerLast];
u64 m_frame;
clock_t m_lastframe;
int m_count;
int m_disp_fb_sprite_blits;
friend class GSPerfMonAutoTimer;
public:
GSPerfMon();
@ -65,12 +53,8 @@ public:
void Put(counter_t c, double val = 0) { m_counters[c] += val; }
double Get(counter_t c) { return m_stats[c]; }
float GetTimer(timer_t t) { return m_timer_stats[t]; }
void Update();
void Start(int timer = Main);
void Stop(int timer = Main);
__fi void AddDisplayFramebufferSpriteBlit() { m_disp_fb_sprite_blits++; }
__fi int GetDisplayFramebufferSpriteBlits()
{
@ -80,18 +64,4 @@ public:
}
};
class GSPerfMonAutoTimer
{
GSPerfMon* m_pm;
int m_timer;
public:
GSPerfMonAutoTimer(GSPerfMon* pm, int timer = GSPerfMon::Main)
{
m_timer = timer;
(m_pm = pm)->Start(m_timer);
}
~GSPerfMonAutoTimer() { m_pm->Stop(m_timer); }
};
extern GSPerfMon g_perfmon;

View File

@ -1965,8 +1965,6 @@ void GSState::SoftReset(u32 mask)
void GSState::ReadFIFO(u8* mem, int size)
{
GSPerfMonAutoTimer pmat(&g_perfmon);
Flush();
size *= 16;
@ -1985,8 +1983,6 @@ template void GSState::Transfer<3>(const u8* mem, u32 size);
template <int index>
void GSState::Transfer(const u8* mem, u32 size)
{
GSPerfMonAutoTimer pmat(&g_perfmon);
const u8* start = mem;
GIFPath& path = m_path[index];

View File

@ -27,7 +27,9 @@ class GSJobQueue final
{
private:
std::thread m_thread;
std::function<void()> m_startup;
std::function<void(T&)> m_func;
std::function<void()> m_shutdown;
bool m_exit;
ringbuffer_base<T, CAPACITY> m_queue;
@ -38,6 +40,9 @@ private:
void ThreadProc()
{
if (m_startup)
m_startup();
std::unique_lock<std::mutex> l(m_lock);
while (true)
@ -74,11 +79,16 @@ private:
l.lock();
}
if (m_shutdown)
m_shutdown();
}
public:
GSJobQueue(std::function<void(T&)> func)
: m_func(func)
GSJobQueue(std::function<void()> startup, std::function<void(T&)> func, std::function<void()> shutdown)
: m_startup(std::move(startup))
, m_func(std::move(func))
, m_shutdown(std::move(shutdown))
, m_exit(false)
{
m_thread = std::thread(&GSJobQueue::ThreadProc, this);

View File

@ -416,8 +416,6 @@ static GSVector4 CalculateDrawRect(s32 window_width, s32 window_height, s32 text
void GSRenderer::VSync(u32 field, bool registers_written)
{
GSPerfMonAutoTimer pmat(&g_perfmon);
Flush();
if (s_dump && s_n >= s_saven)

View File

@ -18,6 +18,11 @@
#include "PrecompiledHeader.h"
#include "GSRasterizer.h"
#include "GS/GSExtra.h"
#include "PerformanceMetrics.h"
#include "common/StringUtil.h"
#include "common/PersistentThread.h"
#define ENABLE_DRAW_STATS 0
int GSRasterizerData::s_counter = 0;
@ -128,8 +133,6 @@ int GSRasterizer::GetPixels(bool reset)
void GSRasterizer::Draw(GSRasterizerData* data)
{
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
if (data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0)
return;
@ -137,7 +140,8 @@ void GSRasterizer::Draw(GSRasterizerData* data)
m_pixels.total = 0;
m_primcount = 0;
data->start = __rdtsc();
if constexpr (ENABLE_DRAW_STATS)
data->start = __rdtsc();
m_ds->BeginDraw(data);
@ -244,11 +248,10 @@ void GSRasterizer::Draw(GSRasterizerData* data)
data->pixels = m_pixels.actual;
u64 ticks = __rdtsc() - data->start;
m_pixels.sum += m_pixels.actual;
m_ds->EndDraw(data->frame, ticks, m_pixels.actual, m_pixels.total, m_primcount);
if constexpr (ENABLE_DRAW_STATS)
m_ds->EndDraw(data->frame, __rdtsc() - data->start, m_pixels.actual, m_pixels.total, m_primcount);
}
template <bool scissor_test>
@ -1190,13 +1193,27 @@ GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
{
m_scanline[i] = static_cast<u8>(i % threads);
}
PerformanceMetrics::SetGSSWThreadCount(threads);
}
GSRasterizerList::~GSRasterizerList()
{
PerformanceMetrics::SetGSSWThreadCount(0);
_aligned_free(m_scanline);
}
void GSRasterizerList::OnWorkerStartup(int i)
{
Threading::SetNameOfCurrentThread(StringUtil::StdStringFromFormat("GS-SW-%d", i).c_str());
PerformanceMetrics::SetGSSWThreadTimer(i, Common::ThreadCPUTimer::GetForCallingThread());
}
void GSRasterizerList::OnWorkerShutdown(int i)
{
PerformanceMetrics::SetGSSWThreadTimer(i, Common::ThreadCPUTimer());
}
void GSRasterizerList::Queue(const GSRingHeap::SharedPtr<GSRasterizerData>& data)
{
GSVector4i r = data->bbox.rintersect(data->scissor);

View File

@ -197,6 +197,9 @@ protected:
GSRasterizerList(int threads, GSPerfMon* perfmon);
void OnWorkerStartup(int i);
void OnWorkerShutdown(int i);
public:
virtual ~GSRasterizerList();
@ -217,7 +220,9 @@ public:
rl->m_r.push_back(std::unique_ptr<GSRasterizer>(new GSRasterizer(new DS(), i, threads, perfmon)));
auto& r = *rl->m_r[i];
rl->m_workers.push_back(std::unique_ptr<GSWorker>(new GSWorker(
[&r](GSRingHeap::SharedPtr<GSRasterizerData>& item) { r.Draw(item.get()); })));
[rl, i]() { rl->OnWorkerStartup(i); },
[&r](GSRingHeap::SharedPtr<GSRasterizerData>& item) { r.Draw(item.get()); },
[rl, i]() { rl->OnWorkerShutdown(i); })));
}
return rl;

View File

@ -582,9 +582,7 @@ void GSRendererSW::Sync(int reason)
{
//printf("sync %d\n", reason);
GSPerfMonAutoTimer pmat(&g_perfmon, GSPerfMon::Sync);
u64 t = __rdtsc();
u64 t = LOG ? __rdtsc() : 0;
m_rl->Sync();
@ -607,7 +605,7 @@ void GSRendererSW::Sync(int reason)
}
}
t = __rdtsc() - t;
t = LOG ? (__rdtsc() - t) : 0;
int pixels = m_rl->GetPixels();

View File

@ -16,6 +16,7 @@
#include "PrecompiledHeader.h"
#include <chrono>
#include <vector>
#include "PerformanceMetrics.h"
#include "System.h"
@ -57,6 +58,14 @@ static float s_gs_thread_time = 0.0f;
static float s_vu_thread_usage = 0.0f;
static float s_vu_thread_time = 0.0f;
struct GSSWThreadStats
{
Common::ThreadCPUTimer timer;
double usage = 0.0;
double time = 0.0;
};
std::vector<GSSWThreadStats> s_gs_sw_threads;
void PerformanceMetrics::Clear()
{
Reset();
@ -139,6 +148,12 @@ void PerformanceMetrics::Update(bool gs_register_write, bool fb_blit)
s_cpu_thread_timer.GetUsageInMillisecondsAndReset(ticks_diff, &s_cpu_thread_time, &s_cpu_thread_usage);
s_cpu_thread_time /= static_cast<double>(s_frames_since_last_update);
for (GSSWThreadStats& thread : s_gs_sw_threads)
{
thread.timer.GetUsageInMillisecondsAndReset(ticks_diff, &thread.time, &thread.usage);
thread.time /= static_cast<double>(s_frames_since_last_update);
}
const u64 gs_time = GetMTGS().GetCpuTime();
const u64 vu_time = THREAD_VU1 ? vu1Thread.GetCpuTime() : 0;
const u64 ticks = GetCPUTicks();
@ -171,6 +186,17 @@ void PerformanceMetrics::SetCPUThreadTimer(Common::ThreadCPUTimer timer)
s_cpu_thread_timer = std::move(timer);
}
void PerformanceMetrics::SetGSSWThreadCount(u32 count)
{
s_gs_sw_threads.clear();
s_gs_sw_threads.resize(count);
}
void PerformanceMetrics::SetGSSWThreadTimer(u32 index, Common::ThreadCPUTimer timer)
{
s_gs_sw_threads[index].timer = std::move(timer);
}
void PerformanceMetrics::SetVerticalFrequency(float rate)
{
s_vertical_frequency = rate;
@ -245,3 +271,18 @@ float PerformanceMetrics::GetVUThreadAverageTime()
{
return s_vu_thread_time;
}
u32 PerformanceMetrics::GetGSSWThreadCount()
{
return static_cast<u32>(s_gs_sw_threads.size());
}
double PerformanceMetrics::GetGSSWThreadUsage(u32 index)
{
return s_gs_sw_threads[index].usage;
}
double PerformanceMetrics::GetGSSWThreadAverageTime(u32 index)
{
return s_gs_sw_threads[index].time;
}

View File

@ -32,6 +32,10 @@ namespace PerformanceMetrics
/// Sets the EE thread for CPU usage calculations.
void SetCPUThreadTimer(Common::ThreadCPUTimer timer);
/// Sets timers for GS software threads.
void SetGSSWThreadCount(u32 count);
void SetGSSWThreadTimer(u32 index, Common::ThreadCPUTimer timer);
/// Sets the vertical frequency, used in speed calculations.
void SetVerticalFrequency(float rate);
@ -52,4 +56,8 @@ namespace PerformanceMetrics
float GetGSThreadAverageTime();
float GetVUThreadUsage();
float GetVUThreadAverageTime();
u32 GetGSSWThreadCount();
double GetGSSWThreadUsage(u32 index);
double GetGSSWThreadAverageTime(u32 index);
} // namespace PerformanceMetrics