GS: Enhance stats

This commit is contained in:
Connor McLaughlin 2021-10-24 18:27:19 +10:00 committed by refractionpcsx2
parent 0c36647506
commit 226b0540c5
15 changed files with 106 additions and 91 deletions

View File

@ -35,6 +35,7 @@
#include "pcsx2/Config.h"
#include "pcsx2/Host.h"
#include "pcsx2/HostDisplay.h"
#include "pcsx2/GS.h"
#ifdef PCSX2_CORE
#include "pcsx2/HostSettings.h"
#endif
@ -629,26 +630,27 @@ void GSgetInternalResolution(int* width, int* height)
void GSgetStats(std::string& info)
{
GSPerfMon& pm = s_gs->m_perfmon;
GSPerfMon& pm = g_perfmon;
const char* api_name = HostDisplay::RenderAPIToString(s_render_api);
if (GSConfig.Renderer == GSRendererType::SW)
{
int sum = 0;
for (int i = 0; i < 16; i++)
sum += pm.CPU(GSPerfMon::WorkerDraw0 + i);
float sum = 0.0f;
for (int i = GSPerfMon::WorkerDraw0; i < GSPerfMon::TimerLast; i++)
sum += pm.GetTimer(static_cast<GSPerfMon::timer_t>(i));
const double fps = 1000.0f / pm.Get(GSPerfMon::Frame);
const double fps = GetVerticalFrequency();
const double fillrate = pm.Get(GSPerfMon::Fillrate);
info = format("%d S | %d P | %d D | %.2f U | %.2f D | %.2f mpps | %d%% WCPU",
info = format("%s SW | %d S | %d P | %d D | %.2f U | %.2f D | %.2f mpps | %d%% WCPU",
api_name,
(int)pm.Get(GSPerfMon::SyncPoint),
(int)pm.Get(GSPerfMon::Prim),
(int)pm.Get(GSPerfMon::Draw),
pm.Get(GSPerfMon::Swizzle) / 1024,
pm.Get(GSPerfMon::Unswizzle) / 1024,
fps * fillrate / (1024 * 1024),
sum);
static_cast<int>(std::lround(sum)));
}
else if (GSConfig.Renderer == GSRendererType::Null)
{
@ -656,12 +658,14 @@ void GSgetStats(std::string& info)
}
else
{
info = format("%d S | %d P | %d D | %.2f U | %.2f D",
(int)pm.Get(GSPerfMon::SyncPoint),
info = format("%s HW | %d P | %d D | %d DC | %d RB | %d TC | %d TU",
api_name,
(int)pm.Get(GSPerfMon::Prim),
(int)pm.Get(GSPerfMon::Draw),
pm.Get(GSPerfMon::Swizzle) / 1024,
pm.Get(GSPerfMon::Unswizzle) / 1024);
(int)std::ceil(pm.Get(GSPerfMon::DrawCalls)),
(int)std::ceil(pm.Get(GSPerfMon::Readbacks)),
(int)std::ceil(pm.Get(GSPerfMon::TextureCopies)),
(int)std::ceil(pm.Get(GSPerfMon::TextureUploads)));
}
}

View File

@ -15,6 +15,9 @@
#include "PrecompiledHeader.h"
#include "GSPerfMon.h"
#include "GS.h"
GSPerfMon g_perfmon;
GSPerfMon::GSPerfMon()
: m_frame(0)
@ -23,41 +26,15 @@ GSPerfMon::GSPerfMon()
{
memset(m_counters, 0, sizeof(m_counters));
memset(m_stats, 0, sizeof(m_stats));
memset(m_timer_stats, 0, sizeof(m_timer_stats));
memset(m_total, 0, sizeof(m_total));
memset(m_begin, 0, sizeof(m_begin));
}
void GSPerfMon::Put(counter_t c, double val)
void GSPerfMon::EndFrame()
{
#ifndef DISABLE_PERF_MON
if (c == Frame)
{
#if defined(__unix__) || defined(__APPLE__)
struct timespec ts;
# ifdef CLOCK_MONOTONIC_RAW
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
# else
clock_gettime(CLOCK_MONOTONIC, &ts);
# endif
u64 now = (u64)ts.tv_sec * (u64)1e6 + (u64)ts.tv_nsec / (u64)1e3;
#else
clock_t now = clock();
#endif
if (m_lastframe != 0)
{
m_counters[c] += (now - m_lastframe) * 1000 / CLOCKS_PER_SEC;
}
m_lastframe = now;
m_frame++;
m_count++;
}
else
{
m_counters[c] += val;
}
#endif
}
void GSPerfMon::Update()
@ -71,6 +48,30 @@ void GSPerfMon::Update()
}
m_count = 0;
// Update CPU usage for SW renderer.
if (GSConfig.Renderer == GSRendererType::SW)
{
const u64 current = __rdtsc();
for (size_t i = WorkerDraw0; i < TimerLast; i++)
{
if (m_begin[i] == 0)
{
m_timer_stats[i] = 0.0f;
continue;
}
m_timer_stats[i] =
static_cast<float>(static_cast<double>(m_total[i]) / static_cast<double>(current - m_begin[i])
* 100.0);
m_begin[i] = 0;
m_start[i] = 0;
m_total[i] = 0;
}
}
}
memset(m_counters, 0, sizeof(m_counters));
@ -99,17 +100,3 @@ void GSPerfMon::Stop(int timer)
}
#endif
}
int GSPerfMon::CPU(int timer, bool reset)
{
int percent = (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer]));
if (reset)
{
m_begin[timer] = 0;
m_start[timer] = 0;
m_total[timer] = 0;
}
return percent;
}

View File

@ -29,20 +29,26 @@ public:
enum counter_t
{
Frame,
Prim,
Draw,
DrawCalls,
Readbacks,
Swizzle,
Unswizzle,
Fillrate,
Quad,
SyncPoint,
CounterLast,
// Reused counters for HW.
TextureCopies = Fillrate,
TextureUploads = SyncPoint,
};
protected:
double m_counters[CounterLast];
double m_stats[CounterLast];
float m_timer_stats[TimerLast];
u64 m_begin[TimerLast], m_total[TimerLast], m_start[TimerLast];
u64 m_frame;
clock_t m_lastframe;
@ -55,14 +61,15 @@ public:
void SetFrame(u64 frame) { m_frame = frame; }
u64 GetFrame() { return m_frame; }
void EndFrame();
void Put(counter_t c, double val = 0);
void Put(counter_t c, double val = 0) { m_counters[c] += val; }
double Get(counter_t c) { return m_stats[c]; }
float GetTimer(timer_t t) { return m_timer_stats[t]; }
void Update();
void Start(int timer = Main);
void Stop(int timer = Main);
int CPU(int timer = Main, bool reset = true);
};
class GSPerfMonAutoTimer
@ -78,3 +85,5 @@ public:
}
~GSPerfMonAutoTimer() { m_pm->Stop(m_timer); }
};
extern GSPerfMon g_perfmon;

View File

@ -1397,7 +1397,7 @@ void GSState::FlushWrite()
m_tr.start += len;
m_perfmon.Put(GSPerfMon::Swizzle, len);
g_perfmon.Put(GSPerfMon::Swizzle, len);
}
void GSState::FlushPrim()
@ -1477,8 +1477,8 @@ void GSState::FlushPrim()
m_context->RestoreReg();
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
g_perfmon.Put(GSPerfMon::Draw, 1);
g_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
m_index.tail = 0;
@ -1557,7 +1557,7 @@ void GSState::Write(const u8* mem, int len)
m_tr.start = m_tr.end = m_tr.total;
m_perfmon.Put(GSPerfMon::Swizzle, len);
g_perfmon.Put(GSPerfMon::Swizzle, len);
}
else
{
@ -1791,7 +1791,7 @@ void GSState::SoftReset(u32 mask)
void GSState::ReadFIFO(u8* mem, int size)
{
GSPerfMonAutoTimer pmat(&m_perfmon);
GSPerfMonAutoTimer pmat(&g_perfmon);
Flush();
@ -1811,7 +1811,7 @@ template void GSState::Transfer<3>(const u8* mem, u32 size);
template <int index>
void GSState::Transfer(const u8* mem, u32 size)
{
GSPerfMonAutoTimer pmat(&m_perfmon);
GSPerfMonAutoTimer pmat(&g_perfmon);
const u8* start = mem;
@ -2208,7 +2208,7 @@ int GSState::Defrost(const freezeData* fd)
UpdateScissor();
m_perfmon.SetFrame(5000);
g_perfmon.SetFrame(5000);
return 0;
}

View File

@ -212,7 +212,6 @@ public:
GSLocalMemory m_mem;
GSDrawingEnvironment m_env;
GSDrawingContext* m_context;
GSPerfMon m_perfmon;
u32 m_crc;
CRC::Game m_game;
std::unique_ptr<GSDumpBase> m_dump;

View File

@ -384,15 +384,13 @@ static GSVector4 CalculateDrawRect(s32 window_width, s32 window_height, s32 text
void GSRenderer::VSync(u32 field)
{
GSPerfMonAutoTimer pmat(&m_perfmon);
m_perfmon.Put(GSPerfMon::Frame);
GSPerfMonAutoTimer pmat(&g_perfmon);
Flush();
if (s_dump && s_n >= s_saven)
{
m_regs->Dump(root_sw + format("%05d_f%lld_gs_reg.txt", s_n, m_perfmon.GetFrame()));
m_regs->Dump(root_sw + format("%05d_f%lld_gs_reg.txt", s_n, g_perfmon.GetFrame()));
}
g_gs_device->AgePool();
@ -409,8 +407,9 @@ void GSRenderer::VSync(u32 field)
return;
}
if ((m_perfmon.GetFrame() & 0x1f) == 0)
m_perfmon.Update();
g_perfmon.EndFrame();
if ((g_perfmon.GetFrame() & 0x1f) == 0)
g_perfmon.Update();
g_gs_device->ResetAPIState();
if (Host::BeginPresentFrame(false))

View File

@ -18,6 +18,7 @@
#include "GSDevice11.h"
#include "GS/Renderers/DX11/D3D.h"
#include "GS/GSExtra.h"
#include "GS/GSPerfMon.h"
#include "GS/GSUtil.h"
#include "Host.h"
#include "HostDisplay.h"
@ -370,6 +371,8 @@ void GSDevice11::RestoreAPIState()
void GSDevice11::BeforeDraw()
{
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
// DX can't read from the FB
// So let's copy it and send that to the shader instead
@ -546,6 +549,8 @@ bool GSDevice11::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTextu
{
ASSERT(src);
ASSERT(!m_download_tex);
g_perfmon.Put(GSPerfMon::Readbacks, 1);
m_download_tex.reset(static_cast<GSTexture11*>(CreateOffscreen(rect.width(), rect.height(), src->GetFormat())));
if (!m_download_tex)
return false;
@ -570,6 +575,8 @@ void GSDevice11::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r)
return;
}
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
D3D11_BOX box = {(UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U};
// DX api isn't happy if we pass a box for depth copy

View File

@ -16,6 +16,7 @@
#include "PrecompiledHeader.h"
#include "GSTexture11.h"
#include "GS/GSPng.h"
#include "GS/GSPerfMon.h"
GSTexture11::GSTexture11(wil::com_ptr_nothrow<ID3D11Texture2D> texture, GSTexture::Format format)
: m_texture(std::move(texture)), m_layer(0)
@ -56,6 +57,8 @@ bool GSTexture11::Update(const GSVector4i& r, const void* data, int pitch, int l
if (m_dev && m_texture)
{
g_perfmon.Put(GSPerfMon::TextureUploads, 1);
D3D11_BOX box = {(UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U};
UINT subresource = layer; // MipSlice + (ArraySlice * MipLevels).

View File

@ -331,7 +331,7 @@ GSTexture* GSRendererHW::GetOutput(int i, int& y_offset)
{
if (s_savef && s_n >= s_saven)
{
t->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, psm_str(TEX0.PSM)));
t->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, g_perfmon.GetFrame(), i, (int)TEX0.TBP0, psm_str(TEX0.PSM)));
}
}
#endif
@ -354,7 +354,7 @@ GSTexture* GSRendererHW::GetFeedbackOutput()
#ifdef ENABLE_OGL_DEBUG
if (s_dump && s_savef && s_n >= s_saven)
t->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), 3, (int)TEX0.TBP0, psm_str(TEX0.PSM)));
t->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, g_perfmon.GetFrame(), 3, (int)TEX0.TBP0, psm_str(TEX0.PSM)));
#endif
return t;
@ -1513,7 +1513,7 @@ void GSRendererHW::Draw()
if (s_dump)
{
const u64 frame = m_perfmon.GetFrame();
const u64 frame = g_perfmon.GetFrame();
std::string s;
@ -1680,7 +1680,7 @@ void GSRendererHW::Draw()
if (s_dump)
{
const u64 frame = m_perfmon.GetFrame();
const u64 frame = g_perfmon.GetFrame();
std::string s;

View File

@ -1956,7 +1956,7 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
if (blocks > 0)
{
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_palette ? 2 : 0));
g_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_palette ? 2 : 0));
Flush(m_write.count, layer);
}

View File

@ -680,6 +680,7 @@ void GSDeviceOGL::RestoreAPIState()
void GSDeviceOGL::DrawPrimitive()
{
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
glDrawArrays(m_draw_topology, m_vertex.start, m_vertex.count);
}
@ -687,6 +688,7 @@ void GSDeviceOGL::DrawIndexedPrimitive()
{
if (!m_disable_hw_gl_draw)
{
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
glDrawElementsBaseVertex(m_draw_topology, static_cast<u32>(m_index.count), GL_UNSIGNED_INT,
reinterpret_cast<void*>(static_cast<u32>(m_index.start) * sizeof(u32)), static_cast<GLint>(m_vertex.start));
}
@ -698,6 +700,7 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
if (!m_disable_hw_gl_draw)
{
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
glDrawElementsBaseVertex(m_draw_topology, count, GL_UNSIGNED_INT,
reinterpret_cast<void*>((static_cast<u32>(m_index.start) + static_cast<u32>(offset)) * sizeof(u32)),
static_cast<GLint>(m_vertex.start));
@ -1105,6 +1108,7 @@ std::string GSDeviceOGL::GetPSSource(PSSelector sel)
bool GSDeviceOGL::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map)
{
ASSERT(src);
g_perfmon.Put(GSPerfMon::Readbacks, 1);
GSTextureOGL* srcgl = static_cast<GSTextureOGL*>(src);
@ -1116,6 +1120,7 @@ bool GSDeviceOGL::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSText
void GSDeviceOGL::BlitRect(GSTexture* sTex, const GSVector4i& r, const GSVector2i& dsize, bool at_origin, bool linear)
{
GL_PUSH(format("CopyRectConv from %d", static_cast<GSTextureOGL*>(sTex)->GetID()).c_str());
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
// NOTE: This previously used glCopyTextureSubImage2D(), but this appears to leak memory in
// the loading screens of Evolution Snowboarding in Intel/NVIDIA drivers.
@ -1153,6 +1158,7 @@ void GSDeviceOGL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r
#endif
dTex->CommitRegion(GSVector2i(r.z, r.w));
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
ASSERT(GLExtension::Has("GL_ARB_copy_image") && glCopyImageSubData);
glCopyImageSubData(sid, GL_TEXTURE_2D,

View File

@ -17,6 +17,7 @@
#include <limits.h>
#include "GSTextureOGL.h"
#include "GLState.h"
#include "GS/GSPerfMon.h"
#include "GS/GSPng.h"
#ifdef ENABLE_OGL_DEBUG_MEM_BW
@ -393,6 +394,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
#endif
GL_PUSH("Upload Texture %d", m_texture_id);
g_perfmon.Put(GSPerfMon::TextureUploads, 1);
// The easy solution without PBO
#if 0
@ -449,6 +451,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
if (m_type == Type::Texture || m_type == Type::RenderTarget)
{
GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap
g_perfmon.Put(GSPerfMon::TextureUploads, 1);
m_clean = false;

View File

@ -35,7 +35,7 @@ GSRendererSW::GSRendererSW(int threads)
memset(m_texture, 0, sizeof(m_texture));
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon);
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &g_perfmon);
m_output = (u8*)_aligned_malloc(1024 * 1024 * sizeof(u32), 32);
@ -97,7 +97,7 @@ void GSRendererSW::VSync(u32 field)
if (0) if (LOG)
{
fprintf(s_fp, "%llu\n", m_perfmon.GetFrame());
fprintf(s_fp, "%llu\n", g_perfmon.GetFrame());
GSVector4i dr = GetDisplayRect();
GSVector4i fr = GetFrameRect();
@ -162,7 +162,7 @@ GSTexture* GSRendererSW::GetOutput(int i, int& y_offset)
{
if (s_savef && s_n >= s_saven)
{
m_texture[i]->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), i, (int)DISPFB.Block(), psm_str(DISPFB.PSM)));
m_texture[i]->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, g_perfmon.GetFrame(), i, (int)DISPFB.Block(), psm_str(DISPFB.PSM)));
}
}
}
@ -362,7 +362,7 @@ void GSRendererSW::Draw()
sd->scissor = scissor;
sd->bbox = bbox;
sd->frame = m_perfmon.GetFrame();
sd->frame = g_perfmon.GetFrame();
if (!GetScanlineGlobalData(sd))
{
@ -434,7 +434,7 @@ void GSRendererSW::Draw()
{
Sync(2);
u64 frame = m_perfmon.GetFrame();
u64 frame = g_perfmon.GetFrame();
// Dump the texture in 32 bits format. It helps to debug texture shuffle effect
// It will breaks the few games that really uses 16 bits RT
bool texture_shuffle = ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS));
@ -581,7 +581,7 @@ void GSRendererSW::Sync(int reason)
{
//printf("sync %d\n", reason);
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
GSPerfMonAutoTimer pmat(&g_perfmon, GSPerfMon::Sync);
u64 t = __rdtsc();
@ -593,14 +593,14 @@ void GSRendererSW::Sync(int reason)
if (s_save)
{
s = format("%05d_f%lld_rt1_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), m_context->FRAME.Block(), psm_str(m_context->FRAME.PSM));
s = format("%05d_f%lld_rt1_%05x_%s.bmp", s_n, g_perfmon.GetFrame(), m_context->FRAME.Block(), psm_str(m_context->FRAME.PSM));
m_mem.SaveBMP(m_dump_root + s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);
}
if (s_savez)
{
s = format("%05d_f%lld_zb1_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), m_context->ZBUF.Block(), psm_str(m_context->ZBUF.PSM));
s = format("%05d_f%lld_zb1_%05x_%s.bmp", s_n, g_perfmon.GetFrame(), m_context->ZBUF.Block(), psm_str(m_context->ZBUF.PSM));
m_mem.SaveBMP(m_dump_root + s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512);
}
@ -616,7 +616,7 @@ void GSRendererSW::Sync(int reason)
fflush(s_fp);
}
m_perfmon.Put(GSPerfMon::Fillrate, pixels);
g_perfmon.Put(GSPerfMon::Fillrate, pixels);
}
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
@ -1541,7 +1541,7 @@ void GSRendererSW::SharedData::UpdateSource()
if (m_parent->s_dump)
{
u64 frame = m_parent->m_perfmon.GetFrame();
u64 frame = g_perfmon.GetFrame();
std::string s;

View File

@ -281,7 +281,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
if (blocks > 0)
{
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift);
g_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift);
}
return true;

View File

@ -39,6 +39,4 @@
//#define ENABLE_EXTRA_LOG // print extra log
#endif
#if (defined(__unix__) || defined(__APPLE__)) && !(defined(_DEBUG) || defined(_DEVEL))
#define DISABLE_PERF_MON // Burn cycle for nothing in release mode
#endif
//#define DISABLE_PERF_MON // Burn cycle for nothing in release mode