mirror of https://github.com/RPCS3/rpcs3.git
Make CPU Profiler able to print stats which sum up the records of all SPU threads
Hitherto the statistics have been exclusively thread-specific. Other improvements: * Fixed container management so a collision of a new element with an older element of the record will become impossible. * Added thread name to thread-specific information printing. * Fixed condition to abort SPU block statistics collection, now matches SPU LLVM Profiler's. * Fix possible division by 0 by checking `samples`.
This commit is contained in:
parent
f7d08d3371
commit
1cab99b3ca
|
@ -76,41 +76,28 @@ struct cpu_prof
|
||||||
|
|
||||||
struct sample_info
|
struct sample_info
|
||||||
{
|
{
|
||||||
// Pointer to the thread
|
|
||||||
std::shared_ptr<cpu_thread> ptr;
|
|
||||||
|
|
||||||
// Block occurences: name -> sample_count
|
// Block occurences: name -> sample_count
|
||||||
std::unordered_map<u64, u64, value_hash<u64>> freq;
|
std::unordered_map<u64, u64, value_hash<u64>> freq;
|
||||||
|
|
||||||
// Total number of samples
|
// Total number of samples
|
||||||
u64 samples = 0, idle = 0;
|
u64 samples = 0, idle = 0;
|
||||||
|
|
||||||
sample_info(const std::shared_ptr<cpu_thread>& ptr)
|
// Avoid printing replicas
|
||||||
: ptr(ptr)
|
bool printed = false;
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void reset()
|
void reset()
|
||||||
{
|
{
|
||||||
freq.clear();
|
freq.clear();
|
||||||
samples = 0;
|
samples = 0;
|
||||||
idle = 0;
|
idle = 0;
|
||||||
|
printed = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print info
|
static std::string format(const std::multimap<u64, u64, std::greater<u64>>& chart, u64 samples, u64 idle, bool extended_print = false)
|
||||||
void print(u32 id) const
|
|
||||||
{
|
{
|
||||||
// Make reversed map: sample_count -> name
|
|
||||||
std::multimap<u64, u64, std::greater<u64>> chart;
|
|
||||||
|
|
||||||
for (auto& [name, count] : freq)
|
|
||||||
{
|
|
||||||
chart.emplace(count, name);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print results
|
// Print results
|
||||||
std::string results;
|
std::string results;
|
||||||
results.reserve(5100);
|
results.reserve(extended_print ? 10100 : 5100);
|
||||||
|
|
||||||
// Fraction of non-idle samples
|
// Fraction of non-idle samples
|
||||||
const f64 busy = 1. * (samples - idle) / samples;
|
const f64 busy = 1. * (samples - idle) / samples;
|
||||||
|
@ -126,20 +113,77 @@ struct cpu_prof
|
||||||
// Print chunk address from lowest 16 bits
|
// Print chunk address from lowest 16 bits
|
||||||
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
|
fmt::append(results, "...chunk-0x%05x]: %.4f%% (%u)", (name & 0xffff) * 4, _frac * 100., count);
|
||||||
|
|
||||||
if (results.size() >= 5000)
|
if (results.size() >= (extended_print ? 10000 : 5000))
|
||||||
{
|
{
|
||||||
// Stop printing after reaching some arbitrary limit in characters
|
// Stop printing after reaching some arbitrary limit in characters
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
profiler.notice("Thread [0x%08x]: %u samples (%.4f%% idle):%s", id, samples, 100. * idle / samples, results);
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print info
|
||||||
|
void print(const std::shared_ptr<cpu_thread>& ptr)
|
||||||
|
{
|
||||||
|
if (printed || samples == idle)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make reversed map: sample_count -> name
|
||||||
|
std::multimap<u64, u64, std::greater<u64>> chart;
|
||||||
|
|
||||||
|
for (auto& [name, count] : freq)
|
||||||
|
{
|
||||||
|
chart.emplace(count, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print results
|
||||||
|
const std::string results = format(chart, samples, idle);
|
||||||
|
profiler.notice("Thread \"%s\" [0x%08x]: %u samples (%.4f%% idle):%s", ptr->get_name(), ptr->id, samples, 100. * idle / samples, results);
|
||||||
|
|
||||||
|
printed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_all(const std::unordered_map<std::shared_ptr<cpu_thread>, sample_info>& threads)
|
||||||
|
{
|
||||||
|
std::multimap<u64, u64, std::greater<u64>> chart;
|
||||||
|
|
||||||
|
std::unordered_map<u64, u64, value_hash<u64>> freq;
|
||||||
|
|
||||||
|
u64 samples = 0, idle = 0;
|
||||||
|
|
||||||
|
for (auto& [_, info] : threads)
|
||||||
|
{
|
||||||
|
// This function collects thread information regardless of 'printed' member state
|
||||||
|
for (auto& [name, count] : info.freq)
|
||||||
|
{
|
||||||
|
freq[name] += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
samples += info.samples;
|
||||||
|
idle += info.idle;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& [name, count] : freq)
|
||||||
|
{
|
||||||
|
chart.emplace(count, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (samples == idle)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string results = format(chart, samples, idle, true);
|
||||||
|
profiler.notice("All Threads: %u samples (%.4f%% idle):%s", samples, 100. * idle / samples, results);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void operator()()
|
void operator()()
|
||||||
{
|
{
|
||||||
std::unordered_map<u32, sample_info, value_hash<u64>> threads;
|
std::unordered_map<std::shared_ptr<cpu_thread>, sample_info> threads;
|
||||||
|
|
||||||
while (thread_ctrl::state() != thread_state::aborting)
|
while (thread_ctrl::state() != thread_state::aborting)
|
||||||
{
|
{
|
||||||
|
@ -173,14 +217,13 @@ struct cpu_prof
|
||||||
|
|
||||||
if (ptr && cpu_flag::exit - ptr->state)
|
if (ptr && cpu_flag::exit - ptr->state)
|
||||||
{
|
{
|
||||||
auto [found, add] = threads.try_emplace(id, ptr);
|
auto [found, add] = threads.try_emplace(std::move(ptr));
|
||||||
|
|
||||||
if (!add)
|
if (!add)
|
||||||
{
|
{
|
||||||
// Overwritten: print previous data
|
// Overwritten (impossible?): print previous data
|
||||||
found->second.print(id);
|
found->second.print(found->first);
|
||||||
found->second.reset();
|
found->second.reset();
|
||||||
found->second.ptr = std::move(ptr);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -193,17 +236,17 @@ struct cpu_prof
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sample active threads
|
// Sample active threads
|
||||||
for (auto& [id, info] : threads)
|
for (auto& [ptr, info] : threads)
|
||||||
{
|
{
|
||||||
if (cpu_flag::exit - info.ptr->state)
|
if (cpu_flag::exit - ptr->state)
|
||||||
{
|
{
|
||||||
// Get short function hash
|
// Get short function hash
|
||||||
const u64 name = atomic_storage<u64>::load(info.ptr->block_hash);
|
const u64 name = atomic_storage<u64>::load(ptr->block_hash);
|
||||||
|
|
||||||
// Append occurrence
|
// Append occurrence
|
||||||
info.samples++;
|
info.samples++;
|
||||||
|
|
||||||
if (!(info.ptr->state & (cpu_flag::wait + cpu_flag::stop + cpu_flag::dbg_global_pause)))
|
if (auto state = +ptr->state; !::is_paused(state) && !::is_stopped(state) && cpu_flag::wait - state)
|
||||||
{
|
{
|
||||||
info.freq[name]++;
|
info.freq[name]++;
|
||||||
|
|
||||||
|
@ -216,15 +259,10 @@ struct cpu_prof
|
||||||
info.idle++;
|
info.idle++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Cleanup and print results for deleted threads
|
|
||||||
for (auto it = threads.begin(), end = threads.end(); it != end;)
|
|
||||||
{
|
|
||||||
if (cpu_flag::exit & it->second.ptr->state)
|
|
||||||
it->second.print(it->first), it = threads.erase(it);
|
|
||||||
else
|
else
|
||||||
it++;
|
{
|
||||||
|
info.print(ptr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flush)
|
if (flush)
|
||||||
|
@ -232,10 +270,9 @@ struct cpu_prof
|
||||||
profiler.success("Flushing profiling results...");
|
profiler.success("Flushing profiling results...");
|
||||||
|
|
||||||
// Print all results and cleanup
|
// Print all results and cleanup
|
||||||
for (auto& [id, info] : threads)
|
for (auto& [ptr, info] : threads)
|
||||||
{
|
{
|
||||||
info.print(id);
|
info.print(ptr);
|
||||||
info.reset();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -244,10 +281,12 @@ struct cpu_prof
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print all remaining results
|
// Print all remaining results
|
||||||
for (auto& [id, info] : threads)
|
for (auto& [ptr, info] : threads)
|
||||||
{
|
{
|
||||||
info.print(id);
|
info.print(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sample_info::print_all(threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr auto thread_name = "CPU Profiler"sv;
|
static constexpr auto thread_name = "CPU Profiler"sv;
|
||||||
|
|
Loading…
Reference in New Issue