mirror of https://github.com/PCSX2/pcsx2.git
GS: Compute SW CPU per-thread not per-draw
This commit is contained in:
parent
c6ce380042
commit
206f80c5f4
|
@ -592,6 +592,15 @@ static void DrawPerformanceOverlay()
|
|||
PerformanceMetrics::GetGSThreadAverageTime());
|
||||
DRAW_LINE(s_fixed_font, text.c_str(), IM_COL32(255, 255, 255, 255));
|
||||
|
||||
const u32 gs_sw_threads = PerformanceMetrics::GetGSSWThreadCount();
|
||||
for (u32 i = 0; i < gs_sw_threads; i++)
|
||||
{
|
||||
text.Clear();
|
||||
text.Write("SW-%u: %.1f%% (%.2fms)", i, PerformanceMetrics::GetGSSWThreadUsage(i),
|
||||
PerformanceMetrics::GetGSSWThreadAverageTime(i));
|
||||
DRAW_LINE(s_fixed_font, text.c_str(), IM_COL32(255, 255, 255, 255));
|
||||
}
|
||||
|
||||
if (THREAD_VU1)
|
||||
{
|
||||
text.Clear();
|
||||
|
|
|
@ -657,21 +657,16 @@ void GSgetStats(std::string& info)
|
|||
|
||||
if (GSConfig.Renderer == GSRendererType::SW)
|
||||
{
|
||||
float sum = 0.0f;
|
||||
for (int i = GSPerfMon::WorkerDraw0; i < GSPerfMon::TimerLast; i++)
|
||||
sum += pm.GetTimer(static_cast<GSPerfMon::timer_t>(i));
|
||||
|
||||
const double fps = GetVerticalFrequency();
|
||||
const double fillrate = pm.Get(GSPerfMon::Fillrate);
|
||||
info = format("%s SW | %d S | %d P | %d D | %.2f U | %.2f D | %.2f mpps | %d%% WCPU",
|
||||
info = format("%s SW | %d S | %d P | %d D | %.2f U | %.2f D | %.2f mpps",
|
||||
api_name,
|
||||
(int)pm.Get(GSPerfMon::SyncPoint),
|
||||
(int)pm.Get(GSPerfMon::Prim),
|
||||
(int)pm.Get(GSPerfMon::Draw),
|
||||
pm.Get(GSPerfMon::Swizzle) / 1024,
|
||||
pm.Get(GSPerfMon::Unswizzle) / 1024,
|
||||
fps * fillrate / (1024 * 1024),
|
||||
static_cast<int>(std::lround(sum)));
|
||||
fps * fillrate / (1024 * 1024));
|
||||
}
|
||||
else if (GSConfig.Renderer == GSRendererType::Null)
|
||||
{
|
||||
|
|
|
@ -538,7 +538,7 @@ bool GSCapture::BeginCapture(float fps, GSVector2i recommendedResolution, float
|
|||
|
||||
for (int i = 0; i < m_threads; i++)
|
||||
{
|
||||
m_workers.push_back(std::unique_ptr<GSPng::Worker>(new GSPng::Worker(&GSPng::Process)));
|
||||
m_workers.push_back(std::unique_ptr<GSPng::Worker>(new GSPng::Worker({}, &GSPng::Process, {})));
|
||||
}
|
||||
|
||||
m_capturing = true;
|
||||
|
|
|
@ -26,9 +26,6 @@ GSPerfMon::GSPerfMon()
|
|||
{
|
||||
memset(m_counters, 0, sizeof(m_counters));
|
||||
memset(m_stats, 0, sizeof(m_stats));
|
||||
memset(m_timer_stats, 0, sizeof(m_timer_stats));
|
||||
memset(m_total, 0, sizeof(m_total));
|
||||
memset(m_begin, 0, sizeof(m_begin));
|
||||
}
|
||||
|
||||
void GSPerfMon::EndFrame()
|
||||
|
@ -39,7 +36,6 @@ void GSPerfMon::EndFrame()
|
|||
|
||||
void GSPerfMon::Update()
|
||||
{
|
||||
#ifndef DISABLE_PERF_MON
|
||||
if (m_count > 0)
|
||||
{
|
||||
for (size_t i = 0; i < std::size(m_counters); i++)
|
||||
|
@ -48,55 +44,7 @@ void GSPerfMon::Update()
|
|||
}
|
||||
|
||||
m_count = 0;
|
||||
|
||||
// Update CPU usage for SW renderer.
|
||||
if (GSConfig.Renderer == GSRendererType::SW)
|
||||
{
|
||||
const u64 current = __rdtsc();
|
||||
|
||||
for (size_t i = WorkerDraw0; i < TimerLast; i++)
|
||||
{
|
||||
if (m_begin[i] == 0)
|
||||
{
|
||||
m_timer_stats[i] = 0.0f;
|
||||
continue;
|
||||
}
|
||||
|
||||
m_timer_stats[i] =
|
||||
static_cast<float>(static_cast<double>(m_total[i]) / static_cast<double>(current - m_begin[i])
|
||||
* 100.0);
|
||||
|
||||
m_begin[i] = 0;
|
||||
m_start[i] = 0;
|
||||
m_total[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
memset(m_counters, 0, sizeof(m_counters));
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSPerfMon::Start(int timer)
|
||||
{
|
||||
#ifndef DISABLE_PERF_MON
|
||||
m_start[timer] = __rdtsc();
|
||||
|
||||
if (m_begin[timer] == 0)
|
||||
{
|
||||
m_begin[timer] = m_start[timer];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSPerfMon::Stop(int timer)
|
||||
{
|
||||
#ifndef DISABLE_PERF_MON
|
||||
if (m_start[timer] > 0)
|
||||
{
|
||||
m_total[timer] += __rdtsc() - m_start[timer];
|
||||
m_start[timer] = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -18,14 +18,6 @@
|
|||
class GSPerfMon
|
||||
{
|
||||
public:
|
||||
enum timer_t
|
||||
{
|
||||
Main,
|
||||
Sync,
|
||||
WorkerDraw0,
|
||||
TimerLast = WorkerDraw0 + 32, // Enough space for 32 GS worker threads
|
||||
};
|
||||
|
||||
enum counter_t
|
||||
{
|
||||
Prim,
|
||||
|
@ -47,15 +39,11 @@ public:
|
|||
protected:
|
||||
double m_counters[CounterLast];
|
||||
double m_stats[CounterLast];
|
||||
float m_timer_stats[TimerLast];
|
||||
u64 m_begin[TimerLast], m_total[TimerLast], m_start[TimerLast];
|
||||
u64 m_frame;
|
||||
clock_t m_lastframe;
|
||||
int m_count;
|
||||
int m_disp_fb_sprite_blits;
|
||||
|
||||
friend class GSPerfMonAutoTimer;
|
||||
|
||||
public:
|
||||
GSPerfMon();
|
||||
|
||||
|
@ -65,12 +53,8 @@ public:
|
|||
|
||||
void Put(counter_t c, double val = 0) { m_counters[c] += val; }
|
||||
double Get(counter_t c) { return m_stats[c]; }
|
||||
float GetTimer(timer_t t) { return m_timer_stats[t]; }
|
||||
void Update();
|
||||
|
||||
void Start(int timer = Main);
|
||||
void Stop(int timer = Main);
|
||||
|
||||
__fi void AddDisplayFramebufferSpriteBlit() { m_disp_fb_sprite_blits++; }
|
||||
__fi int GetDisplayFramebufferSpriteBlits()
|
||||
{
|
||||
|
@ -80,18 +64,4 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class GSPerfMonAutoTimer
|
||||
{
|
||||
GSPerfMon* m_pm;
|
||||
int m_timer;
|
||||
|
||||
public:
|
||||
GSPerfMonAutoTimer(GSPerfMon* pm, int timer = GSPerfMon::Main)
|
||||
{
|
||||
m_timer = timer;
|
||||
(m_pm = pm)->Start(m_timer);
|
||||
}
|
||||
~GSPerfMonAutoTimer() { m_pm->Stop(m_timer); }
|
||||
};
|
||||
|
||||
extern GSPerfMon g_perfmon;
|
|
@ -1965,8 +1965,6 @@ void GSState::SoftReset(u32 mask)
|
|||
|
||||
void GSState::ReadFIFO(u8* mem, int size)
|
||||
{
|
||||
GSPerfMonAutoTimer pmat(&g_perfmon);
|
||||
|
||||
Flush();
|
||||
|
||||
size *= 16;
|
||||
|
@ -1985,8 +1983,6 @@ template void GSState::Transfer<3>(const u8* mem, u32 size);
|
|||
template <int index>
|
||||
void GSState::Transfer(const u8* mem, u32 size)
|
||||
{
|
||||
GSPerfMonAutoTimer pmat(&g_perfmon);
|
||||
|
||||
const u8* start = mem;
|
||||
|
||||
GIFPath& path = m_path[index];
|
||||
|
|
|
@ -27,7 +27,9 @@ class GSJobQueue final
|
|||
{
|
||||
private:
|
||||
std::thread m_thread;
|
||||
std::function<void()> m_startup;
|
||||
std::function<void(T&)> m_func;
|
||||
std::function<void()> m_shutdown;
|
||||
bool m_exit;
|
||||
ringbuffer_base<T, CAPACITY> m_queue;
|
||||
|
||||
|
@ -38,6 +40,9 @@ private:
|
|||
|
||||
void ThreadProc()
|
||||
{
|
||||
if (m_startup)
|
||||
m_startup();
|
||||
|
||||
std::unique_lock<std::mutex> l(m_lock);
|
||||
|
||||
while (true)
|
||||
|
@ -74,11 +79,16 @@ private:
|
|||
|
||||
l.lock();
|
||||
}
|
||||
|
||||
if (m_shutdown)
|
||||
m_shutdown();
|
||||
}
|
||||
|
||||
public:
|
||||
GSJobQueue(std::function<void(T&)> func)
|
||||
: m_func(func)
|
||||
GSJobQueue(std::function<void()> startup, std::function<void(T&)> func, std::function<void()> shutdown)
|
||||
: m_startup(std::move(startup))
|
||||
, m_func(std::move(func))
|
||||
, m_shutdown(std::move(shutdown))
|
||||
, m_exit(false)
|
||||
{
|
||||
m_thread = std::thread(&GSJobQueue::ThreadProc, this);
|
||||
|
|
|
@ -416,8 +416,6 @@ static GSVector4 CalculateDrawRect(s32 window_width, s32 window_height, s32 text
|
|||
|
||||
void GSRenderer::VSync(u32 field, bool registers_written)
|
||||
{
|
||||
GSPerfMonAutoTimer pmat(&g_perfmon);
|
||||
|
||||
Flush();
|
||||
|
||||
if (s_dump && s_n >= s_saven)
|
||||
|
|
|
@ -18,6 +18,11 @@
|
|||
#include "PrecompiledHeader.h"
|
||||
#include "GSRasterizer.h"
|
||||
#include "GS/GSExtra.h"
|
||||
#include "PerformanceMetrics.h"
|
||||
#include "common/StringUtil.h"
|
||||
#include "common/PersistentThread.h"
|
||||
|
||||
#define ENABLE_DRAW_STATS 0
|
||||
|
||||
int GSRasterizerData::s_counter = 0;
|
||||
|
||||
|
@ -128,8 +133,6 @@ int GSRasterizer::GetPixels(bool reset)
|
|||
|
||||
void GSRasterizer::Draw(GSRasterizerData* data)
|
||||
{
|
||||
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
|
||||
|
||||
if (data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0)
|
||||
return;
|
||||
|
||||
|
@ -137,7 +140,8 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
m_pixels.total = 0;
|
||||
m_primcount = 0;
|
||||
|
||||
data->start = __rdtsc();
|
||||
if constexpr (ENABLE_DRAW_STATS)
|
||||
data->start = __rdtsc();
|
||||
|
||||
m_ds->BeginDraw(data);
|
||||
|
||||
|
@ -244,11 +248,10 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
|
||||
data->pixels = m_pixels.actual;
|
||||
|
||||
u64 ticks = __rdtsc() - data->start;
|
||||
|
||||
m_pixels.sum += m_pixels.actual;
|
||||
|
||||
m_ds->EndDraw(data->frame, ticks, m_pixels.actual, m_pixels.total, m_primcount);
|
||||
if constexpr (ENABLE_DRAW_STATS)
|
||||
m_ds->EndDraw(data->frame, __rdtsc() - data->start, m_pixels.actual, m_pixels.total, m_primcount);
|
||||
}
|
||||
|
||||
template <bool scissor_test>
|
||||
|
@ -1190,13 +1193,27 @@ GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
|
|||
{
|
||||
m_scanline[i] = static_cast<u8>(i % threads);
|
||||
}
|
||||
|
||||
PerformanceMetrics::SetGSSWThreadCount(threads);
|
||||
}
|
||||
|
||||
GSRasterizerList::~GSRasterizerList()
|
||||
{
|
||||
PerformanceMetrics::SetGSSWThreadCount(0);
|
||||
_aligned_free(m_scanline);
|
||||
}
|
||||
|
||||
void GSRasterizerList::OnWorkerStartup(int i)
|
||||
{
|
||||
Threading::SetNameOfCurrentThread(StringUtil::StdStringFromFormat("GS-SW-%d", i).c_str());
|
||||
PerformanceMetrics::SetGSSWThreadTimer(i, Common::ThreadCPUTimer::GetForCallingThread());
|
||||
}
|
||||
|
||||
void GSRasterizerList::OnWorkerShutdown(int i)
|
||||
{
|
||||
PerformanceMetrics::SetGSSWThreadTimer(i, Common::ThreadCPUTimer());
|
||||
}
|
||||
|
||||
void GSRasterizerList::Queue(const GSRingHeap::SharedPtr<GSRasterizerData>& data)
|
||||
{
|
||||
GSVector4i r = data->bbox.rintersect(data->scissor);
|
||||
|
|
|
@ -197,6 +197,9 @@ protected:
|
|||
|
||||
GSRasterizerList(int threads, GSPerfMon* perfmon);
|
||||
|
||||
void OnWorkerStartup(int i);
|
||||
void OnWorkerShutdown(int i);
|
||||
|
||||
public:
|
||||
virtual ~GSRasterizerList();
|
||||
|
||||
|
@ -217,7 +220,9 @@ public:
|
|||
rl->m_r.push_back(std::unique_ptr<GSRasterizer>(new GSRasterizer(new DS(), i, threads, perfmon)));
|
||||
auto& r = *rl->m_r[i];
|
||||
rl->m_workers.push_back(std::unique_ptr<GSWorker>(new GSWorker(
|
||||
[&r](GSRingHeap::SharedPtr<GSRasterizerData>& item) { r.Draw(item.get()); })));
|
||||
[rl, i]() { rl->OnWorkerStartup(i); },
|
||||
[&r](GSRingHeap::SharedPtr<GSRasterizerData>& item) { r.Draw(item.get()); },
|
||||
[rl, i]() { rl->OnWorkerShutdown(i); })));
|
||||
}
|
||||
|
||||
return rl;
|
||||
|
|
|
@ -582,9 +582,7 @@ void GSRendererSW::Sync(int reason)
|
|||
{
|
||||
//printf("sync %d\n", reason);
|
||||
|
||||
GSPerfMonAutoTimer pmat(&g_perfmon, GSPerfMon::Sync);
|
||||
|
||||
u64 t = __rdtsc();
|
||||
u64 t = LOG ? __rdtsc() : 0;
|
||||
|
||||
m_rl->Sync();
|
||||
|
||||
|
@ -607,7 +605,7 @@ void GSRendererSW::Sync(int reason)
|
|||
}
|
||||
}
|
||||
|
||||
t = __rdtsc() - t;
|
||||
t = LOG ? (__rdtsc() - t) : 0;
|
||||
|
||||
int pixels = m_rl->GetPixels();
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "PrecompiledHeader.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
|
||||
#include "PerformanceMetrics.h"
|
||||
#include "System.h"
|
||||
|
@ -57,6 +58,14 @@ static float s_gs_thread_time = 0.0f;
|
|||
static float s_vu_thread_usage = 0.0f;
|
||||
static float s_vu_thread_time = 0.0f;
|
||||
|
||||
struct GSSWThreadStats
|
||||
{
|
||||
Common::ThreadCPUTimer timer;
|
||||
double usage = 0.0;
|
||||
double time = 0.0;
|
||||
};
|
||||
std::vector<GSSWThreadStats> s_gs_sw_threads;
|
||||
|
||||
void PerformanceMetrics::Clear()
|
||||
{
|
||||
Reset();
|
||||
|
@ -139,6 +148,12 @@ void PerformanceMetrics::Update(bool gs_register_write, bool fb_blit)
|
|||
s_cpu_thread_timer.GetUsageInMillisecondsAndReset(ticks_diff, &s_cpu_thread_time, &s_cpu_thread_usage);
|
||||
s_cpu_thread_time /= static_cast<double>(s_frames_since_last_update);
|
||||
|
||||
for (GSSWThreadStats& thread : s_gs_sw_threads)
|
||||
{
|
||||
thread.timer.GetUsageInMillisecondsAndReset(ticks_diff, &thread.time, &thread.usage);
|
||||
thread.time /= static_cast<double>(s_frames_since_last_update);
|
||||
}
|
||||
|
||||
const u64 gs_time = GetMTGS().GetCpuTime();
|
||||
const u64 vu_time = THREAD_VU1 ? vu1Thread.GetCpuTime() : 0;
|
||||
const u64 ticks = GetCPUTicks();
|
||||
|
@ -171,6 +186,17 @@ void PerformanceMetrics::SetCPUThreadTimer(Common::ThreadCPUTimer timer)
|
|||
s_cpu_thread_timer = std::move(timer);
|
||||
}
|
||||
|
||||
void PerformanceMetrics::SetGSSWThreadCount(u32 count)
|
||||
{
|
||||
s_gs_sw_threads.clear();
|
||||
s_gs_sw_threads.resize(count);
|
||||
}
|
||||
|
||||
void PerformanceMetrics::SetGSSWThreadTimer(u32 index, Common::ThreadCPUTimer timer)
|
||||
{
|
||||
s_gs_sw_threads[index].timer = std::move(timer);
|
||||
}
|
||||
|
||||
void PerformanceMetrics::SetVerticalFrequency(float rate)
|
||||
{
|
||||
s_vertical_frequency = rate;
|
||||
|
@ -245,3 +271,18 @@ float PerformanceMetrics::GetVUThreadAverageTime()
|
|||
{
|
||||
return s_vu_thread_time;
|
||||
}
|
||||
|
||||
u32 PerformanceMetrics::GetGSSWThreadCount()
|
||||
{
|
||||
return static_cast<u32>(s_gs_sw_threads.size());
|
||||
}
|
||||
|
||||
double PerformanceMetrics::GetGSSWThreadUsage(u32 index)
|
||||
{
|
||||
return s_gs_sw_threads[index].usage;
|
||||
}
|
||||
|
||||
double PerformanceMetrics::GetGSSWThreadAverageTime(u32 index)
|
||||
{
|
||||
return s_gs_sw_threads[index].time;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,10 @@ namespace PerformanceMetrics
|
|||
/// Sets the EE thread for CPU usage calculations.
|
||||
void SetCPUThreadTimer(Common::ThreadCPUTimer timer);
|
||||
|
||||
/// Sets timers for GS software threads.
|
||||
void SetGSSWThreadCount(u32 count);
|
||||
void SetGSSWThreadTimer(u32 index, Common::ThreadCPUTimer timer);
|
||||
|
||||
/// Sets the vertical frequency, used in speed calculations.
|
||||
void SetVerticalFrequency(float rate);
|
||||
|
||||
|
@ -52,4 +56,8 @@ namespace PerformanceMetrics
|
|||
float GetGSThreadAverageTime();
|
||||
float GetVUThreadUsage();
|
||||
float GetVUThreadAverageTime();
|
||||
|
||||
u32 GetGSSWThreadCount();
|
||||
double GetGSSWThreadUsage(u32 index);
|
||||
double GetGSSWThreadAverageTime(u32 index);
|
||||
} // namespace PerformanceMetrics
|
Loading…
Reference in New Issue