forked from ShuriZma/suyu
Merge pull request #538 from yuriks/perf-stat
Add profiling infrastructure and widget
This commit is contained in:
commit
93e32bce72
|
@ -13,6 +13,7 @@ set(SRCS
|
||||||
debugger/graphics_cmdlists.cpp
|
debugger/graphics_cmdlists.cpp
|
||||||
debugger/graphics_framebuffer.cpp
|
debugger/graphics_framebuffer.cpp
|
||||||
debugger/graphics_vertex_shader.cpp
|
debugger/graphics_vertex_shader.cpp
|
||||||
|
debugger/profiler.cpp
|
||||||
debugger/ramview.cpp
|
debugger/ramview.cpp
|
||||||
debugger/registers.cpp
|
debugger/registers.cpp
|
||||||
util/spinbox.cpp
|
util/spinbox.cpp
|
||||||
|
@ -35,6 +36,7 @@ set(HEADERS
|
||||||
debugger/graphics_cmdlists.h
|
debugger/graphics_cmdlists.h
|
||||||
debugger/graphics_framebuffer.h
|
debugger/graphics_framebuffer.h
|
||||||
debugger/graphics_vertex_shader.h
|
debugger/graphics_vertex_shader.h
|
||||||
|
debugger/profiler.h
|
||||||
debugger/ramview.h
|
debugger/ramview.h
|
||||||
debugger/registers.h
|
debugger/registers.h
|
||||||
util/spinbox.h
|
util/spinbox.h
|
||||||
|
@ -48,6 +50,7 @@ set(UIS
|
||||||
config/controller_config.ui
|
config/controller_config.ui
|
||||||
debugger/callstack.ui
|
debugger/callstack.ui
|
||||||
debugger/disassembler.ui
|
debugger/disassembler.ui
|
||||||
|
debugger/profiler.ui
|
||||||
debugger/registers.ui
|
debugger/registers.ui
|
||||||
hotkeys.ui
|
hotkeys.ui
|
||||||
main.ui
|
main.ui
|
||||||
|
|
|
@ -0,0 +1,138 @@
|
||||||
|
// Copyright 2015 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "profiler.h"
|
||||||
|
|
||||||
|
#include "common/profiler_reporting.h"
|
||||||
|
|
||||||
|
using namespace Common::Profiling;
|
||||||
|
|
||||||
|
static QVariant GetDataForColumn(int col, const AggregatedDuration& duration)
|
||||||
|
{
|
||||||
|
static auto duration_to_float = [](Duration dur) -> float {
|
||||||
|
using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||||
|
return std::chrono::duration_cast<FloatMs>(dur).count();
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (col) {
|
||||||
|
case 1: return duration_to_float(duration.avg);
|
||||||
|
case 2: return duration_to_float(duration.min);
|
||||||
|
case 3: return duration_to_float(duration.max);
|
||||||
|
default: return QVariant();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static const TimingCategoryInfo* GetCategoryInfo(int id)
|
||||||
|
{
|
||||||
|
const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
|
||||||
|
if (id >= categories.size()) {
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
return &categories[id];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent)
|
||||||
|
{
|
||||||
|
updateProfilingInfo();
|
||||||
|
const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
|
||||||
|
results.time_per_category.resize(categories.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const
|
||||||
|
{
|
||||||
|
if (orientation == Qt::Horizontal && role == Qt::DisplayRole) {
|
||||||
|
switch (section) {
|
||||||
|
case 0: return tr("Category");
|
||||||
|
case 1: return tr("Avg");
|
||||||
|
case 2: return tr("Min");
|
||||||
|
case 3: return tr("Max");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return QVariant();
|
||||||
|
}
|
||||||
|
|
||||||
|
QModelIndex ProfilerModel::index(int row, int column, const QModelIndex& parent) const
|
||||||
|
{
|
||||||
|
return createIndex(row, column);
|
||||||
|
}
|
||||||
|
|
||||||
|
QModelIndex ProfilerModel::parent(const QModelIndex& child) const
|
||||||
|
{
|
||||||
|
return QModelIndex();
|
||||||
|
}
|
||||||
|
|
||||||
|
int ProfilerModel::columnCount(const QModelIndex& parent) const
|
||||||
|
{
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ProfilerModel::rowCount(const QModelIndex& parent) const
|
||||||
|
{
|
||||||
|
if (parent.isValid()) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return results.time_per_category.size() + 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
QVariant ProfilerModel::data(const QModelIndex& index, int role) const
|
||||||
|
{
|
||||||
|
if (role == Qt::DisplayRole) {
|
||||||
|
if (index.row() == 0) {
|
||||||
|
if (index.column() == 0) {
|
||||||
|
return tr("Frame");
|
||||||
|
} else {
|
||||||
|
return GetDataForColumn(index.column(), results.frame_time);
|
||||||
|
}
|
||||||
|
} else if (index.row() == 1) {
|
||||||
|
if (index.column() == 0) {
|
||||||
|
return tr("Frame (with swapping)");
|
||||||
|
} else {
|
||||||
|
return GetDataForColumn(index.column(), results.interframe_time);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (index.column() == 0) {
|
||||||
|
const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2);
|
||||||
|
return info != nullptr ? QString(info->name) : QVariant();
|
||||||
|
} else {
|
||||||
|
if (index.row() - 2 < results.time_per_category.size()) {
|
||||||
|
return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]);
|
||||||
|
} else {
|
||||||
|
return QVariant();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return QVariant();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfilerModel::updateProfilingInfo()
|
||||||
|
{
|
||||||
|
results = GetTimingResultsAggregator()->GetAggregatedResults();
|
||||||
|
emit dataChanged(createIndex(0, 1), createIndex(rowCount() - 1, 3));
|
||||||
|
}
|
||||||
|
|
||||||
|
ProfilerWidget::ProfilerWidget(QWidget* parent) : QDockWidget(parent)
|
||||||
|
{
|
||||||
|
ui.setupUi(this);
|
||||||
|
|
||||||
|
model = new ProfilerModel(this);
|
||||||
|
ui.treeView->setModel(model);
|
||||||
|
|
||||||
|
connect(this, SIGNAL(visibilityChanged(bool)), SLOT(setProfilingInfoUpdateEnabled(bool)));
|
||||||
|
connect(&update_timer, SIGNAL(timeout()), model, SLOT(updateProfilingInfo()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable)
|
||||||
|
{
|
||||||
|
if (enable) {
|
||||||
|
update_timer.start(100);
|
||||||
|
model->updateProfilingInfo();
|
||||||
|
} else {
|
||||||
|
update_timer.stop();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
// Copyright 2015 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <QAbstractItemModel>
|
||||||
|
#include <QDockWidget>
|
||||||
|
#include <QTimer>
|
||||||
|
#include "ui_profiler.h"
|
||||||
|
|
||||||
|
#include "common/profiler_reporting.h"
|
||||||
|
|
||||||
|
class ProfilerModel : public QAbstractItemModel
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
public:
|
||||||
|
ProfilerModel(QObject* parent);
|
||||||
|
|
||||||
|
QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const;
|
||||||
|
QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override;
|
||||||
|
QModelIndex parent(const QModelIndex& child) const override;
|
||||||
|
int columnCount(const QModelIndex& parent = QModelIndex()) const override;
|
||||||
|
int rowCount(const QModelIndex& parent = QModelIndex()) const override;
|
||||||
|
QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override;
|
||||||
|
|
||||||
|
public slots:
|
||||||
|
void updateProfilingInfo();
|
||||||
|
|
||||||
|
private:
|
||||||
|
Common::Profiling::AggregatedFrameResult results;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ProfilerWidget : public QDockWidget
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
public:
|
||||||
|
ProfilerWidget(QWidget* parent = 0);
|
||||||
|
|
||||||
|
private slots:
|
||||||
|
void setProfilingInfoUpdateEnabled(bool enable);
|
||||||
|
|
||||||
|
private:
|
||||||
|
Ui::Profiler ui;
|
||||||
|
ProfilerModel* model;
|
||||||
|
|
||||||
|
QTimer update_timer;
|
||||||
|
};
|
|
@ -0,0 +1,33 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ui version="4.0">
|
||||||
|
<class>Profiler</class>
|
||||||
|
<widget class="QDockWidget" name="Profiler">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>0</x>
|
||||||
|
<y>0</y>
|
||||||
|
<width>400</width>
|
||||||
|
<height>300</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="windowTitle">
|
||||||
|
<string>Profiler</string>
|
||||||
|
</property>
|
||||||
|
<widget class="QWidget" name="dockWidgetContents">
|
||||||
|
<layout class="QVBoxLayout" name="verticalLayout">
|
||||||
|
<item>
|
||||||
|
<widget class="QTreeView" name="treeView">
|
||||||
|
<property name="alternatingRowColors">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
<property name="uniformRowHeights">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
</widget>
|
||||||
|
<resources/>
|
||||||
|
<connections/>
|
||||||
|
</ui>
|
|
@ -35,6 +35,7 @@
|
||||||
#include "debugger/graphics_cmdlists.h"
|
#include "debugger/graphics_cmdlists.h"
|
||||||
#include "debugger/graphics_framebuffer.h"
|
#include "debugger/graphics_framebuffer.h"
|
||||||
#include "debugger/graphics_vertex_shader.h"
|
#include "debugger/graphics_vertex_shader.h"
|
||||||
|
#include "debugger/profiler.h"
|
||||||
|
|
||||||
#include "core/settings.h"
|
#include "core/settings.h"
|
||||||
#include "core/system.h"
|
#include "core/system.h"
|
||||||
|
@ -57,6 +58,10 @@ GMainWindow::GMainWindow()
|
||||||
render_window = new GRenderWindow;
|
render_window = new GRenderWindow;
|
||||||
render_window->hide();
|
render_window->hide();
|
||||||
|
|
||||||
|
profilerWidget = new ProfilerWidget(this);
|
||||||
|
addDockWidget(Qt::BottomDockWidgetArea, profilerWidget);
|
||||||
|
profilerWidget->hide();
|
||||||
|
|
||||||
disasmWidget = new DisassemblerWidget(this, render_window->GetEmuThread());
|
disasmWidget = new DisassemblerWidget(this, render_window->GetEmuThread());
|
||||||
addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
|
addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
|
||||||
disasmWidget->hide();
|
disasmWidget->hide();
|
||||||
|
@ -90,6 +95,7 @@ GMainWindow::GMainWindow()
|
||||||
graphicsVertexShaderWidget->hide();
|
graphicsVertexShaderWidget->hide();
|
||||||
|
|
||||||
QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
|
QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
|
||||||
|
debug_menu->addAction(profilerWidget->toggleViewAction());
|
||||||
debug_menu->addAction(disasmWidget->toggleViewAction());
|
debug_menu->addAction(disasmWidget->toggleViewAction());
|
||||||
debug_menu->addAction(registersWidget->toggleViewAction());
|
debug_menu->addAction(registersWidget->toggleViewAction());
|
||||||
debug_menu->addAction(callstackWidget->toggleViewAction());
|
debug_menu->addAction(callstackWidget->toggleViewAction());
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
class GImageInfo;
|
class GImageInfo;
|
||||||
class GRenderWindow;
|
class GRenderWindow;
|
||||||
|
class ProfilerWidget;
|
||||||
class DisassemblerWidget;
|
class DisassemblerWidget;
|
||||||
class RegistersWidget;
|
class RegistersWidget;
|
||||||
class CallstackWidget;
|
class CallstackWidget;
|
||||||
|
@ -54,6 +55,7 @@ private:
|
||||||
|
|
||||||
GRenderWindow* render_window;
|
GRenderWindow* render_window;
|
||||||
|
|
||||||
|
ProfilerWidget* profilerWidget;
|
||||||
DisassemblerWidget* disasmWidget;
|
DisassemblerWidget* disasmWidget;
|
||||||
RegistersWidget* registersWidget;
|
RegistersWidget* registersWidget;
|
||||||
CallstackWidget* callstackWidget;
|
CallstackWidget* callstackWidget;
|
||||||
|
|
|
@ -14,6 +14,7 @@ set(SRCS
|
||||||
mem_arena.cpp
|
mem_arena.cpp
|
||||||
memory_util.cpp
|
memory_util.cpp
|
||||||
misc.cpp
|
misc.cpp
|
||||||
|
profiler.cpp
|
||||||
scm_rev.cpp
|
scm_rev.cpp
|
||||||
string_util.cpp
|
string_util.cpp
|
||||||
symbols.cpp
|
symbols.cpp
|
||||||
|
@ -48,11 +49,14 @@ set(HEADERS
|
||||||
mem_arena.h
|
mem_arena.h
|
||||||
memory_util.h
|
memory_util.h
|
||||||
platform.h
|
platform.h
|
||||||
|
profiler.h
|
||||||
|
profiler_reporting.h
|
||||||
scm_rev.h
|
scm_rev.h
|
||||||
scope_exit.h
|
scope_exit.h
|
||||||
string_util.h
|
string_util.h
|
||||||
swap.h
|
swap.h
|
||||||
symbols.h
|
symbols.h
|
||||||
|
synchronized_wrapper.h
|
||||||
thread.h
|
thread.h
|
||||||
thread_queue_list.h
|
thread_queue_list.h
|
||||||
thunk.h
|
thunk.h
|
||||||
|
|
|
@ -0,0 +1,182 @@
|
||||||
|
// Copyright 2015 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/profiler.h"
|
||||||
|
#include "common/profiler_reporting.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013.
|
||||||
|
#define NOMINMAX
|
||||||
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
#include <Windows.h> // For QueryPerformanceCounter/Frequency
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
namespace Profiling {
|
||||||
|
|
||||||
|
#if ENABLE_PROFILING
|
||||||
|
thread_local Timer* Timer::current_timer = nullptr;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
|
||||||
|
QPCClock::time_point QPCClock::now() {
|
||||||
|
static LARGE_INTEGER freq;
|
||||||
|
// Use this dummy local static to ensure this gets initialized once.
|
||||||
|
static BOOL dummy = QueryPerformanceFrequency(&freq);
|
||||||
|
|
||||||
|
LARGE_INTEGER ticks;
|
||||||
|
QueryPerformanceCounter(&ticks);
|
||||||
|
|
||||||
|
// This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The
|
||||||
|
// correct way to approach this would be to just return ticks as a time_point and then subtract
|
||||||
|
// and do this conversion when creating a duration from two time_points, however, as far as I
|
||||||
|
// could tell the C++ requirements for these types are incompatible with this approach.
|
||||||
|
return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
|
||||||
|
: accumulated_duration(0) {
|
||||||
|
|
||||||
|
ProfilingManager& manager = GetProfilingManager();
|
||||||
|
category_id = manager.RegisterTimingCategory(this, name);
|
||||||
|
if (parent != nullptr)
|
||||||
|
manager.SetTimingCategoryParent(category_id, parent->category_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
ProfilingManager::ProfilingManager()
|
||||||
|
: last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
|
||||||
|
TimingCategoryInfo info;
|
||||||
|
info.category = category;
|
||||||
|
info.name = name;
|
||||||
|
info.parent = TimingCategoryInfo::NO_PARENT;
|
||||||
|
|
||||||
|
unsigned int id = (unsigned int)timing_categories.size();
|
||||||
|
timing_categories.push_back(std::move(info));
|
||||||
|
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
|
||||||
|
ASSERT(category < timing_categories.size());
|
||||||
|
ASSERT(parent < timing_categories.size());
|
||||||
|
|
||||||
|
timing_categories[category].parent = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfilingManager::BeginFrame() {
|
||||||
|
this_frame_start = Clock::now();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfilingManager::FinishFrame() {
|
||||||
|
Clock::time_point now = Clock::now();
|
||||||
|
|
||||||
|
results.interframe_time = now - last_frame_end;
|
||||||
|
results.frame_time = now - this_frame_start;
|
||||||
|
|
||||||
|
results.time_per_category.resize(timing_categories.size());
|
||||||
|
for (size_t i = 0; i < timing_categories.size(); ++i) {
|
||||||
|
results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
|
||||||
|
}
|
||||||
|
|
||||||
|
last_frame_end = now;
|
||||||
|
}
|
||||||
|
|
||||||
|
TimingResultsAggregator::TimingResultsAggregator(size_t window_size)
|
||||||
|
: max_window_size(window_size), window_size(0) {
|
||||||
|
interframe_times.resize(window_size, Duration::zero());
|
||||||
|
frame_times.resize(window_size, Duration::zero());
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingResultsAggregator::Clear() {
|
||||||
|
window_size = cursor = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
|
||||||
|
size_t old_size = times_per_category.size();
|
||||||
|
if (n == old_size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
times_per_category.resize(n);
|
||||||
|
|
||||||
|
for (size_t i = old_size; i < n; ++i) {
|
||||||
|
times_per_category[i].resize(max_window_size, Duration::zero());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
|
||||||
|
SetNumberOfCategories(frame_result.time_per_category.size());
|
||||||
|
|
||||||
|
interframe_times[cursor] = frame_result.interframe_time;
|
||||||
|
frame_times[cursor] = frame_result.frame_time;
|
||||||
|
for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
|
||||||
|
times_per_category[i][cursor] = frame_result.time_per_category[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
++cursor;
|
||||||
|
if (cursor == max_window_size)
|
||||||
|
cursor = 0;
|
||||||
|
if (window_size < max_window_size)
|
||||||
|
++window_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static AggregatedDuration AggregateField(const std::vector<Duration>& v, size_t len) {
|
||||||
|
AggregatedDuration result;
|
||||||
|
result.avg = Duration::zero();
|
||||||
|
|
||||||
|
result.min = result.max = (len == 0 ? Duration::zero() : v[0]);
|
||||||
|
|
||||||
|
for (size_t i = 1; i < len; ++i) {
|
||||||
|
Duration value = v[i];
|
||||||
|
result.avg += value;
|
||||||
|
result.min = std::min(result.min, value);
|
||||||
|
result.max = std::max(result.max, value);
|
||||||
|
}
|
||||||
|
if (len != 0)
|
||||||
|
result.avg /= len;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static float tof(Common::Profiling::Duration dur) {
|
||||||
|
using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||||
|
return std::chrono::duration_cast<FloatMs>(dur).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
|
||||||
|
AggregatedFrameResult result;
|
||||||
|
|
||||||
|
result.interframe_time = AggregateField(interframe_times, window_size);
|
||||||
|
result.frame_time = AggregateField(frame_times, window_size);
|
||||||
|
|
||||||
|
if (result.interframe_time.avg != Duration::zero()) {
|
||||||
|
result.fps = 1000.0f / tof(result.interframe_time.avg);
|
||||||
|
} else {
|
||||||
|
result.fps = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
result.time_per_category.resize(times_per_category.size());
|
||||||
|
for (size_t i = 0; i < times_per_category.size(); ++i) {
|
||||||
|
result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
ProfilingManager& GetProfilingManager() {
|
||||||
|
// Takes advantage of "magic" static initialization for race-free initialization.
|
||||||
|
static ProfilingManager manager;
|
||||||
|
return manager;
|
||||||
|
}
|
||||||
|
|
||||||
|
SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator() {
|
||||||
|
static SynchronizedWrapper<TimingResultsAggregator> aggregator(30);
|
||||||
|
return SynchronizedRef<TimingResultsAggregator>(aggregator);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Profiling
|
||||||
|
} // namespace Common
|
|
@ -0,0 +1,152 @@
|
||||||
|
// Copyright 2015 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/thread.h"
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
namespace Profiling {
|
||||||
|
|
||||||
|
// If this is defined to 0, it turns all Timers into no-ops.
|
||||||
|
#ifndef ENABLE_PROFILING
|
||||||
|
#define ENABLE_PROFILING 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
|
||||||
|
// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad
|
||||||
|
// precision. We manually implement a clock based on QPC to get good results.
|
||||||
|
|
||||||
|
struct QPCClock {
|
||||||
|
using duration = std::chrono::microseconds;
|
||||||
|
using time_point = std::chrono::time_point<QPCClock>;
|
||||||
|
using rep = duration::rep;
|
||||||
|
using period = duration::period;
|
||||||
|
static const bool is_steady = false;
|
||||||
|
|
||||||
|
static time_point now();
|
||||||
|
};
|
||||||
|
|
||||||
|
using Clock = QPCClock;
|
||||||
|
#else
|
||||||
|
using Clock = std::chrono::high_resolution_clock;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using Duration = Clock::duration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a timing category that measured time can be accounted towards. Should be declared as a
|
||||||
|
* global variable and passed to Timers.
|
||||||
|
*/
|
||||||
|
class TimingCategory final {
|
||||||
|
public:
|
||||||
|
TimingCategory(const char* name, TimingCategory* parent = nullptr);
|
||||||
|
|
||||||
|
unsigned int GetCategoryId() const {
|
||||||
|
return category_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds some time to this category. Can safely be called from multiple threads at the same time.
|
||||||
|
void AddTime(Duration amount) {
|
||||||
|
std::atomic_fetch_add_explicit(
|
||||||
|
&accumulated_duration, amount.count(),
|
||||||
|
std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically retrieves the accumulated measured time for this category and resets the counter
|
||||||
|
* to zero. Can be safely called concurrently with AddTime.
|
||||||
|
*/
|
||||||
|
Duration GetAccumulatedTime() {
|
||||||
|
return Duration(std::atomic_exchange_explicit(
|
||||||
|
&accumulated_duration, (Duration::rep)0,
|
||||||
|
std::memory_order_relaxed));
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned int category_id;
|
||||||
|
std::atomic<Duration::rep> accumulated_duration;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
|
||||||
|
* TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
|
||||||
|
* appropriately paired.
|
||||||
|
*
|
||||||
|
* When a Timer is started, it automatically pauses a previously running timer on the same thread,
|
||||||
|
* which is resumed when it is stopped. As such, no special action needs to be taken to avoid
|
||||||
|
* double-accounting of time on two categories.
|
||||||
|
*/
|
||||||
|
class Timer {
|
||||||
|
public:
|
||||||
|
Timer(TimingCategory& category) : category(category) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void Start() {
|
||||||
|
#if ENABLE_PROFILING
|
||||||
|
ASSERT(!running);
|
||||||
|
previous_timer = current_timer;
|
||||||
|
current_timer = this;
|
||||||
|
if (previous_timer != nullptr)
|
||||||
|
previous_timer->StopTiming();
|
||||||
|
|
||||||
|
StartTiming();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Stop() {
|
||||||
|
#if ENABLE_PROFILING
|
||||||
|
ASSERT(running);
|
||||||
|
StopTiming();
|
||||||
|
|
||||||
|
if (previous_timer != nullptr)
|
||||||
|
previous_timer->StartTiming();
|
||||||
|
current_timer = previous_timer;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
#if ENABLE_PROFILING
|
||||||
|
void StartTiming() {
|
||||||
|
start = Clock::now();
|
||||||
|
running = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void StopTiming() {
|
||||||
|
auto duration = Clock::now() - start;
|
||||||
|
running = false;
|
||||||
|
category.AddTime(std::chrono::duration_cast<Duration>(duration));
|
||||||
|
}
|
||||||
|
|
||||||
|
Clock::time_point start;
|
||||||
|
bool running = false;
|
||||||
|
|
||||||
|
Timer* previous_timer;
|
||||||
|
static thread_local Timer* current_timer;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
TimingCategory& category;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Timer that automatically starts timing when created and stops at the end of the scope. Should
|
||||||
|
* be used in the majority of cases.
|
||||||
|
*/
|
||||||
|
class ScopeTimer : public Timer {
|
||||||
|
public:
|
||||||
|
ScopeTimer(TimingCategory& category) : Timer(category) {
|
||||||
|
Start();
|
||||||
|
}
|
||||||
|
|
||||||
|
~ScopeTimer() {
|
||||||
|
Stop();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Profiling
|
||||||
|
} // namespace Common
|
|
@ -0,0 +1,108 @@
|
||||||
|
// Copyright 2015 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <chrono>
|
||||||
|
#include <mutex>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/profiler.h"
|
||||||
|
#include "common/synchronized_wrapper.h"
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
namespace Profiling {
|
||||||
|
|
||||||
|
struct TimingCategoryInfo {
|
||||||
|
static const unsigned int NO_PARENT = -1;
|
||||||
|
|
||||||
|
TimingCategory* category;
|
||||||
|
const char* name;
|
||||||
|
unsigned int parent;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ProfilingFrameResult {
|
||||||
|
/// Time since the last delivered frame
|
||||||
|
Duration interframe_time;
|
||||||
|
|
||||||
|
/// Time spent processing a frame, excluding VSync
|
||||||
|
Duration frame_time;
|
||||||
|
|
||||||
|
/// Total amount of time spent inside each category in this frame. Indexed by the category id
|
||||||
|
std::vector<Duration> time_per_category;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ProfilingManager final {
|
||||||
|
public:
|
||||||
|
ProfilingManager();
|
||||||
|
|
||||||
|
unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
|
||||||
|
void SetTimingCategoryParent(unsigned int category, unsigned int parent);
|
||||||
|
|
||||||
|
const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
|
||||||
|
return timing_categories;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This should be called after swapping screen buffers.
|
||||||
|
void BeginFrame();
|
||||||
|
/// This should be called before swapping screen buffers.
|
||||||
|
void FinishFrame();
|
||||||
|
|
||||||
|
/// Get the timing results from the previous frame. This is updated when you call FinishFrame().
|
||||||
|
const ProfilingFrameResult& GetPreviousFrameResults() const {
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<TimingCategoryInfo> timing_categories;
|
||||||
|
Clock::time_point last_frame_end;
|
||||||
|
Clock::time_point this_frame_start;
|
||||||
|
|
||||||
|
ProfilingFrameResult results;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AggregatedDuration {
|
||||||
|
Duration avg, min, max;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AggregatedFrameResult {
|
||||||
|
/// Time since the last delivered frame
|
||||||
|
AggregatedDuration interframe_time;
|
||||||
|
|
||||||
|
/// Time spent processing a frame, excluding VSync
|
||||||
|
AggregatedDuration frame_time;
|
||||||
|
|
||||||
|
float fps;
|
||||||
|
|
||||||
|
/// Total amount of time spent inside each category in this frame. Indexed by the category id
|
||||||
|
std::vector<AggregatedDuration> time_per_category;
|
||||||
|
};
|
||||||
|
|
||||||
|
class TimingResultsAggregator final {
|
||||||
|
public:
|
||||||
|
TimingResultsAggregator(size_t window_size);
|
||||||
|
|
||||||
|
void Clear();
|
||||||
|
void SetNumberOfCategories(size_t n);
|
||||||
|
|
||||||
|
void AddFrame(const ProfilingFrameResult& frame_result);
|
||||||
|
|
||||||
|
AggregatedFrameResult GetAggregatedResults() const;
|
||||||
|
|
||||||
|
size_t max_window_size;
|
||||||
|
size_t window_size;
|
||||||
|
size_t cursor;
|
||||||
|
|
||||||
|
std::vector<Duration> interframe_times;
|
||||||
|
std::vector<Duration> frame_times;
|
||||||
|
std::vector<std::vector<Duration>> times_per_category;
|
||||||
|
};
|
||||||
|
|
||||||
|
ProfilingManager& GetProfilingManager();
|
||||||
|
SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator();
|
||||||
|
|
||||||
|
} // namespace Profiling
|
||||||
|
} // namespace Common
|
|
@ -0,0 +1,69 @@
|
||||||
|
// Copyright 2015 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps an object, only allowing access to it via a locking reference wrapper. Good to ensure no
|
||||||
|
* one forgets to lock a mutex before acessing an object. To access the wrapped object construct a
|
||||||
|
* SyncronizedRef on this wrapper. Inspired by Rust's Mutex type (http://doc.rust-lang.org/std/sync/struct.Mutex.html).
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
class SynchronizedWrapper {
|
||||||
|
public:
|
||||||
|
template <typename... Args>
|
||||||
|
SynchronizedWrapper(Args&&... args) :
|
||||||
|
data(std::forward<Args>(args)...) {
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template <typename U>
|
||||||
|
friend class SynchronizedRef;
|
||||||
|
|
||||||
|
std::mutex mutex;
|
||||||
|
T data;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synchronized reference, that keeps a SynchronizedWrapper's mutex locked during its lifetime. This
|
||||||
|
* greatly reduces the chance that someone will access the wrapped resource without locking the
|
||||||
|
* mutex.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
class SynchronizedRef {
|
||||||
|
public:
|
||||||
|
SynchronizedRef(SynchronizedWrapper<T>& wrapper) : wrapper(&wrapper) {
|
||||||
|
wrapper.mutex.lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
SynchronizedRef(SynchronizedRef&) = delete;
|
||||||
|
SynchronizedRef(SynchronizedRef&& o) : wrapper(o.wrapper) {
|
||||||
|
o.wrapper = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
~SynchronizedRef() {
|
||||||
|
if (wrapper)
|
||||||
|
wrapper->mutex.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
SynchronizedRef& operator=(SynchronizedRef&) = delete;
|
||||||
|
SynchronizedRef& operator=(SynchronizedRef&& o) {
|
||||||
|
std::swap(wrapper, o.wrapper);
|
||||||
|
}
|
||||||
|
|
||||||
|
T& operator*() { return wrapper->data; }
|
||||||
|
const T& operator*() const { return wrapper->data; }
|
||||||
|
|
||||||
|
T* operator->() { return &wrapper->data; }
|
||||||
|
const T* operator->() const { return &wrapper->data; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
SynchronizedWrapper<T>* wrapper;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Common
|
|
@ -24,6 +24,25 @@
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Support for C++11's thread_local keyword was surprisingly spotty in compilers until very
|
||||||
|
// recently. Fortunately, thread local variables have been well supported for compilers for a while,
|
||||||
|
// but with semantics supporting only POD types, so we can use a few defines to get some amount of
|
||||||
|
// backwards compat support.
|
||||||
|
// WARNING: This only works correctly with POD types.
|
||||||
|
#if defined(__clang__)
|
||||||
|
# if !__has_feature(cxx_thread_local)
|
||||||
|
# define thread_local __thread
|
||||||
|
# endif
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
|
||||||
|
# define thread_local __thread
|
||||||
|
# endif
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
# if _MSC_VER < 1900
|
||||||
|
# define thread_local __declspec(thread)
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Common
|
namespace Common
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
#include "common/profiler.h"
|
||||||
|
|
||||||
#include "core/mem_map.h"
|
#include "core/mem_map.h"
|
||||||
#include "core/hle/hle.h"
|
#include "core/hle/hle.h"
|
||||||
|
@ -20,6 +21,9 @@
|
||||||
#include "core/arm/skyeye_common/armmmu.h"
|
#include "core/arm/skyeye_common/armmmu.h"
|
||||||
#include "core/arm/skyeye_common/vfp/vfp.h"
|
#include "core/arm/skyeye_common/vfp/vfp.h"
|
||||||
|
|
||||||
|
Common::Profiling::TimingCategory profile_execute("DynCom::Execute");
|
||||||
|
Common::Profiling::TimingCategory profile_decode("DynCom::Decode");
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
COND = (1 << 0),
|
COND = (1 << 0),
|
||||||
NON_BRANCH = (1 << 1),
|
NON_BRANCH = (1 << 1),
|
||||||
|
@ -3569,6 +3573,8 @@ typedef struct instruction_set_encoding_item ISEITEM;
|
||||||
extern const ISEITEM arm_instruction[];
|
extern const ISEITEM arm_instruction[];
|
||||||
|
|
||||||
static int InterpreterTranslate(ARMul_State* cpu, int& bb_start, addr_t addr) {
|
static int InterpreterTranslate(ARMul_State* cpu, int& bb_start, addr_t addr) {
|
||||||
|
Common::Profiling::ScopeTimer timer_decode(profile_decode);
|
||||||
|
|
||||||
// Decode instruction, get index
|
// Decode instruction, get index
|
||||||
// Allocate memory and init InsCream
|
// Allocate memory and init InsCream
|
||||||
// Go on next, until terminal instruction
|
// Go on next, until terminal instruction
|
||||||
|
@ -3641,6 +3647,8 @@ static bool InAPrivilegedMode(ARMul_State* core) {
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned InterpreterMainLoop(ARMul_State* state) {
|
unsigned InterpreterMainLoop(ARMul_State* state) {
|
||||||
|
Common::Profiling::ScopeTimer timer_execute(profile_execute);
|
||||||
|
|
||||||
#undef RM
|
#undef RM
|
||||||
#undef RS
|
#undef RS
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/profiler.h"
|
||||||
|
|
||||||
#include "core/arm/arm_interface.h"
|
#include "core/arm/arm_interface.h"
|
||||||
#include "core/mem_map.h"
|
#include "core/mem_map.h"
|
||||||
#include "core/hle/hle.h"
|
#include "core/hle/hle.h"
|
||||||
|
@ -16,6 +18,8 @@
|
||||||
|
|
||||||
namespace HLE {
|
namespace HLE {
|
||||||
|
|
||||||
|
Common::Profiling::TimingCategory profiler_svc("SVC Calls");
|
||||||
|
|
||||||
static std::vector<ModuleDef> g_module_db;
|
static std::vector<ModuleDef> g_module_db;
|
||||||
|
|
||||||
bool g_reschedule = false; ///< If true, immediately reschedules the CPU to a new thread
|
bool g_reschedule = false; ///< If true, immediately reschedules the CPU to a new thread
|
||||||
|
@ -30,6 +34,8 @@ static const FunctionDef* GetSVCInfo(u32 opcode) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void CallSVC(u32 opcode) {
|
void CallSVC(u32 opcode) {
|
||||||
|
Common::Profiling::ScopeTimer timer_svc(profiler_svc);
|
||||||
|
|
||||||
const FunctionDef *info = GetSVCInfo(opcode);
|
const FunctionDef *info = GetSVCInfo(opcode);
|
||||||
|
|
||||||
if (!info) {
|
if (!info) {
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
|
|
||||||
#include <boost/range/algorithm/fill.hpp>
|
#include <boost/range/algorithm/fill.hpp>
|
||||||
|
|
||||||
|
#include "common/profiler.h"
|
||||||
|
|
||||||
#include "clipper.h"
|
#include "clipper.h"
|
||||||
#include "command_processor.h"
|
#include "command_processor.h"
|
||||||
#include "math.h"
|
#include "math.h"
|
||||||
|
@ -25,6 +27,8 @@ static int float_regs_counter = 0;
|
||||||
|
|
||||||
static u32 uniform_write_buffer[4];
|
static u32 uniform_write_buffer[4];
|
||||||
|
|
||||||
|
Common::Profiling::TimingCategory category_drawing("Drawing");
|
||||||
|
|
||||||
static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
|
static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
|
|
||||||
if (id >= registers.NumIds())
|
if (id >= registers.NumIds())
|
||||||
|
@ -53,6 +57,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
case PICA_REG_INDEX(trigger_draw):
|
case PICA_REG_INDEX(trigger_draw):
|
||||||
case PICA_REG_INDEX(trigger_draw_indexed):
|
case PICA_REG_INDEX(trigger_draw_indexed):
|
||||||
{
|
{
|
||||||
|
Common::Profiling::ScopeTimer scope_timer(category_drawing);
|
||||||
|
|
||||||
DebugUtils::DumpTevStageConfig(registers.GetTevStages());
|
DebugUtils::DumpTevStageConfig(registers.GetTevStages());
|
||||||
|
|
||||||
if (g_debug_context)
|
if (g_debug_context)
|
||||||
|
|
|
@ -4,7 +4,10 @@
|
||||||
|
|
||||||
#include "core/hw/gpu.h"
|
#include "core/hw/gpu.h"
|
||||||
#include "core/mem_map.h"
|
#include "core/mem_map.h"
|
||||||
|
|
||||||
#include "common/emu_window.h"
|
#include "common/emu_window.h"
|
||||||
|
#include "common/profiler_reporting.h"
|
||||||
|
|
||||||
#include "video_core/video_core.h"
|
#include "video_core/video_core.h"
|
||||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||||
|
@ -75,9 +78,18 @@ void RendererOpenGL::SwapBuffers() {
|
||||||
|
|
||||||
DrawScreens();
|
DrawScreens();
|
||||||
|
|
||||||
|
auto& profiler = Common::Profiling::GetProfilingManager();
|
||||||
|
profiler.FinishFrame();
|
||||||
|
{
|
||||||
|
auto aggregator = Common::Profiling::GetTimingResultsAggregator();
|
||||||
|
aggregator->AddFrame(profiler.GetPreviousFrameResults());
|
||||||
|
}
|
||||||
|
|
||||||
// Swap buffers
|
// Swap buffers
|
||||||
render_window->PollEvents();
|
render_window->PollEvents();
|
||||||
render_window->SwapBuffers();
|
render_window->SwapBuffers();
|
||||||
|
|
||||||
|
profiler.BeginFrame();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue