Make CPU Profiler able to print stats which sum up the records of all SPU threads

Hitherto the statistics have been exclusively thread-specific.

Other improvements:
* Fixed container management so a collision of a new element with an older element of the record will become impossible.
* Added thread name to thread-specific information printing.
* Fixed condition to abort SPU block statistics collection, now matches SPU LLVM Profiler's.
* Fix possible division by 0 by checking `samples`.
This commit is contained in:
Eladash 2022-05-03 18:37:48 +03:00 committed by Ivan
parent f7d08d3371
commit 1cab99b3ca
1 changed files with 81 additions and 42 deletions

View File

@ -76,41 +76,28 @@ struct cpu_prof
struct sample_info struct sample_info
{ {
// Pointer to the thread
std::shared_ptr<cpu_thread> ptr;
// Block occurences: name -> sample_count // Block occurences: name -> sample_count
std::unordered_map<u64, u64, value_hash<u64>> freq; std::unordered_map<u64, u64, value_hash<u64>> freq;
// Total number of samples // Total number of samples
u64 samples = 0, idle = 0; u64 samples = 0, idle = 0;
sample_info(const std::shared_ptr<cpu_thread>& ptr) // Avoid printing replicas
: ptr(ptr) bool printed = false;
{
}
void reset() void reset()
{ {
freq.clear(); freq.clear();
samples = 0; samples = 0;
idle = 0; idle = 0;
printed = false;
} }
// Print info static std::string format(const std::multimap<u64, u64, std::greater<u64>>& chart, u64 samples, u64 idle, bool extended_print = false)
void print(u32 id) const
{ {
// Make reversed map: sample_count -> name
std::multimap<u64, u64, std::greater<u64>> chart;
for (auto& [name, count] : freq)
{
chart.emplace(count, name);
}
// Print results // Print results
std::string results; std::string results;
results.reserve(5100); results.reserve(extended_print ? 10100 : 5100);
// Fraction of non-idle samples // Fraction of non-idle samples
const f64 busy = 1. * (samples - idle) / samples; const f64 busy = 1. * (samples - idle) / samples;
@ -126,20 +113,77 @@ struct cpu_prof
// Print chunk address from lowest 16 bits // Print chunk address from lowest 16 bits
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count); fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
if (results.size() >= 5000) if (results.size() >= (extended_print ? 10000 : 5000))
{ {
// Stop printing after reaching some arbitrary limit in characters // Stop printing after reaching some arbitrary limit in characters
break; break;
} }
} }
profiler.notice("Thread [0x%08x]: %u samples (%.4f%% idle):%s", id, samples, 100. * idle / samples, results); return results;
}
// Print info
void print(const std::shared_ptr<cpu_thread>& ptr)
{
if (printed || samples == idle)
{
return;
}
// Make reversed map: sample_count -> name
std::multimap<u64, u64, std::greater<u64>> chart;
for (auto& [name, count] : freq)
{
chart.emplace(count, name);
}
// Print results
const std::string results = format(chart, samples, idle);
profiler.notice("Thread \"%s\" [0x%08x]: %u samples (%.4f%% idle):%s", ptr->get_name(), ptr->id, samples, 100. * idle / samples, results);
printed = true;
}
static void print_all(const std::unordered_map<std::shared_ptr<cpu_thread>, sample_info>& threads)
{
std::multimap<u64, u64, std::greater<u64>> chart;
std::unordered_map<u64, u64, value_hash<u64>> freq;
u64 samples = 0, idle = 0;
for (auto& [_, info] : threads)
{
// This function collects thread information regardless of 'printed' member state
for (auto& [name, count] : info.freq)
{
freq[name] += count;
}
samples += info.samples;
idle += info.idle;
}
for (auto& [name, count] : freq)
{
chart.emplace(count, name);
}
if (samples == idle)
{
return;
}
const std::string results = format(chart, samples, idle, true);
profiler.notice("All Threads: %u samples (%.4f%% idle):%s", samples, 100. * idle / samples, results);
} }
}; };
void operator()() void operator()()
{ {
std::unordered_map<u32, sample_info, value_hash<u64>> threads; std::unordered_map<std::shared_ptr<cpu_thread>, sample_info> threads;
while (thread_ctrl::state() != thread_state::aborting) while (thread_ctrl::state() != thread_state::aborting)
{ {
@ -173,14 +217,13 @@ struct cpu_prof
if (ptr && cpu_flag::exit - ptr->state) if (ptr && cpu_flag::exit - ptr->state)
{ {
auto [found, add] = threads.try_emplace(id, ptr); auto [found, add] = threads.try_emplace(std::move(ptr));
if (!add) if (!add)
{ {
// Overwritten: print previous data // Overwritten (impossible?): print previous data
found->second.print(id); found->second.print(found->first);
found->second.reset(); found->second.reset();
found->second.ptr = std::move(ptr);
} }
} }
} }
@ -193,17 +236,17 @@ struct cpu_prof
} }
// Sample active threads // Sample active threads
for (auto& [id, info] : threads) for (auto& [ptr, info] : threads)
{ {
if (cpu_flag::exit - info.ptr->state) if (cpu_flag::exit - ptr->state)
{ {
// Get short function hash // Get short function hash
const u64 name = atomic_storage<u64>::load(info.ptr->block_hash); const u64 name = atomic_storage<u64>::load(ptr->block_hash);
// Append occurrence // Append occurrence
info.samples++; info.samples++;
if (!(info.ptr->state & (cpu_flag::wait + cpu_flag::stop + cpu_flag::dbg_global_pause))) if (auto state = +ptr->state; !::is_paused(state) && !::is_stopped(state) && cpu_flag::wait - state)
{ {
info.freq[name]++; info.freq[name]++;
@ -216,15 +259,10 @@ struct cpu_prof
info.idle++; info.idle++;
} }
} }
}
// Cleanup and print results for deleted threads
for (auto it = threads.begin(), end = threads.end(); it != end;)
{
if (cpu_flag::exit & it->second.ptr->state)
it->second.print(it->first), it = threads.erase(it);
else else
it++; {
info.print(ptr);
}
} }
if (flush) if (flush)
@ -232,10 +270,9 @@ struct cpu_prof
profiler.success("Flushing profiling results..."); profiler.success("Flushing profiling results...");
// Print all results and cleanup // Print all results and cleanup
for (auto& [id, info] : threads) for (auto& [ptr, info] : threads)
{ {
info.print(id); info.print(ptr);
info.reset();
} }
} }
@ -244,10 +281,12 @@ struct cpu_prof
} }
// Print all remaining results // Print all remaining results
for (auto& [id, info] : threads) for (auto& [ptr, info] : threads)
{ {
info.print(id); info.print(ptr);
} }
sample_info::print_all(threads);
} }
static constexpr auto thread_name = "CPU Profiler"sv; static constexpr auto thread_name = "CPU Profiler"sv;