From aa22d07caf4a36bd37a93e8411103ed4ebb905ba Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 2 Jan 2015 17:26:02 -0800 Subject: [PATCH] Super slow MicroProfile GL UI. --- src/poly/ui/ui_event.h | 18 +- src/poly/ui/win32/win32_control.cc | 9 +- src/xenia/gpu/gl4/circular_buffer.cc | 11 +- src/xenia/gpu/gl4/circular_buffer.h | 3 +- src/xenia/gpu/gl4/command_processor.cc | 19 +- src/xenia/gpu/gl4/gl4_graphics_system.cc | 12 +- src/xenia/gpu/gl4/gl4_profiler_display.cc | 560 ++++++++++++++++++++++ src/xenia/gpu/gl4/gl4_profiler_display.h | 84 ++++ src/xenia/gpu/gl4/gl_context.cc | 5 +- src/xenia/gpu/gl4/sources.gypi | 2 + src/xenia/gpu/gl4/wgl_control.cc | 10 +- src/xenia/profiling.cc | 59 ++- src/xenia/profiling.h | 14 +- third_party/microprofile/microprofileui.h | 2 +- 14 files changed, 748 insertions(+), 60 deletions(-) create mode 100644 src/xenia/gpu/gl4/gl4_profiler_display.cc create mode 100644 src/xenia/gpu/gl4/gl4_profiler_display.h diff --git a/src/poly/ui/ui_event.h b/src/poly/ui/ui_event.h index eb3dfe8bc..641eddf90 100644 --- a/src/poly/ui/ui_event.h +++ b/src/poly/ui/ui_event.h @@ -29,12 +29,16 @@ class UIEvent { class KeyEvent : public UIEvent { public: KeyEvent(Control* control, int key_code) - : UIEvent(control), key_code_(key_code) {} + : UIEvent(control), handled_(false), key_code_(key_code) {} ~KeyEvent() override = default; + bool is_handled() const { return handled_; } + void set_handled(bool value) { handled_ = value; } + int key_code() const { return key_code_; } private: + bool handled_; int key_code_; }; @@ -52,9 +56,18 @@ class MouseEvent : public UIEvent { public: MouseEvent(Control* control, Button button, int32_t x, int32_t y, int32_t dx = 0, int32_t dy = 0) - : UIEvent(control), button_(button), x_(x), y_(y), dx_(dx), dy_(dy) {} + : UIEvent(control), + handled_(false), + button_(button), + x_(x), + y_(y), + dx_(dx), + dy_(dy) {} ~MouseEvent() override = default; + bool is_handled() const { return handled_; } + void set_handled(bool value) { handled_ = value; } + Button button() const { return button_; } int32_t x() const { return x_; } int32_t y() const { return y_; } @@ -62,6 +75,7 @@ class MouseEvent : public UIEvent { int32_t dy() const { return dy_; } private: + bool handled_; Button button_; int32_t x_; int32_t y_; diff --git a/src/poly/ui/win32/win32_control.cc b/src/poly/ui/win32/win32_control.cc index c4f888cba..cc75558fc 100644 --- a/src/poly/ui/win32/win32_control.cc +++ b/src/poly/ui/win32/win32_control.cc @@ -342,7 +342,7 @@ bool Win32Control::HandleMouse(UINT message, WPARAM wParam, LPARAM lParam) { OnMouseWheel(e); break; } - return true; + return e.is_handled(); } bool Win32Control::HandleKeyboard(UINT message, WPARAM wParam, LPARAM lParam) { @@ -350,13 +350,12 @@ bool Win32Control::HandleKeyboard(UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { case WM_KEYDOWN: OnKeyDown(e); - return true; + break; case WM_KEYUP: OnKeyUp(e); - return true; - default: - return false; + break; } + return e.is_handled(); } } // namespace win32 diff --git a/src/xenia/gpu/gl4/circular_buffer.cc b/src/xenia/gpu/gl4/circular_buffer.cc index d2a342646..92ce0e643 100644 --- a/src/xenia/gpu/gl4/circular_buffer.cc +++ b/src/xenia/gpu/gl4/circular_buffer.cc @@ -20,8 +20,9 @@ namespace gl4 { extern "C" GLEWContext* glewGetContext(); extern "C" WGLEWContext* wglewGetContext(); -CircularBuffer::CircularBuffer(size_t capacity) +CircularBuffer::CircularBuffer(size_t capacity, size_t alignment) : capacity_(capacity), + alignment_(alignment), write_head_(0), buffer_(0), gpu_base_(0), @@ -64,11 +65,11 @@ void CircularBuffer::Shutdown() { CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) { // Addresses must always be % 256. - length = poly::round_up(length, 256); + size_t aligned_length = poly::round_up(length, alignment_); - assert_true(length <= capacity_, "Request too large"); + assert_true(aligned_length <= capacity_, "Request too large"); - if (write_head_ + length > capacity_) { + if (write_head_ + aligned_length > capacity_) { // Flush and wait. WaitUntilClean(); } @@ -78,7 +79,7 @@ CircularBuffer::Allocation CircularBuffer::Acquire(size_t length) { allocation.gpu_ptr = gpu_base_ + write_head_; allocation.offset = write_head_; allocation.length = length; - write_head_ += length; + write_head_ += aligned_length; return allocation; } diff --git a/src/xenia/gpu/gl4/circular_buffer.h b/src/xenia/gpu/gl4/circular_buffer.h index 987ce746c..777f60274 100644 --- a/src/xenia/gpu/gl4/circular_buffer.h +++ b/src/xenia/gpu/gl4/circular_buffer.h @@ -20,7 +20,7 @@ namespace gl4 { // TODO(benvanik): fences to prevent this from ever flushing. class CircularBuffer { public: - CircularBuffer(size_t capacity); + CircularBuffer(size_t capacity, size_t alignment = 256); ~CircularBuffer(); struct Allocation { @@ -42,6 +42,7 @@ class CircularBuffer { private: size_t capacity_; + size_t alignment_; uintptr_t write_head_; GLuint buffer_; GLuint64 gpu_base_; diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index 6f2dd422c..64029c914 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -125,6 +125,7 @@ void CommandProcessor::WorkerMain() { uint32_t write_ptr_index = write_ptr_index_.load(); while (write_ptr_index == 0xBAADF00D || read_ptr_index_ == write_ptr_index) { + SCOPE_profile_cpu_i("gpu", "xe::gpu::gl4::CommandProcessor::Stall"); // Check if the pointer has moved. // We wait a short bit here to yield time. Since we are also running the // main window display we don't want to pause too long, though. @@ -781,6 +782,8 @@ bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, uint32_t count) { + SCOPE_profile_cpu_f("gpu"); + // generate interrupt from the command stream XETRACECP("[%.8X] Packet(%.8X): PM4_INTERRUPT", packet_ptr, packet); reader->TraceData(count); @@ -797,6 +800,8 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, uint32_t count) { + SCOPE_profile_cpu_f("gpu"); + auto& regs = *register_file_; PLOGI("XE_SWAP"); @@ -858,6 +863,8 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, uint32_t count) { + SCOPE_profile_cpu_f("gpu"); + // wait until a register or memory location is a specific value XETRACECP("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM", packet_ptr, packet); reader->TraceData(count); @@ -1315,6 +1322,7 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { SCOPE_profile_cpu_f("gpu"); auto& regs = *register_file_; auto& cmd = *draw_command; + auto enable_mode = static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); if (enable_mode == ModeControl::kIgnore) { @@ -1455,6 +1463,7 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { } bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) { + SCOPE_profile_cpu_f("gpu"); auto& regs = *register_file_; auto enable_mode = @@ -1541,15 +1550,15 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) { } bool CommandProcessor::UpdateState(DrawCommand* draw_command) { + SCOPE_profile_cpu_f("gpu"); + auto& regs = *register_file_; + auto state_data = draw_command->state_data; + // Much of this state machine is extracted from: // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - auto& regs = *register_file_; - - auto state_data = draw_command->state_data; - uint32_t mode_control = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; // Window parameters. @@ -2115,7 +2124,6 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) { bool CommandProcessor::PopulateSamplers(DrawCommand* draw_command) { SCOPE_profile_cpu_f("gpu"); - auto& regs = *register_file_; // VS and PS samplers are shared, but may be used exclusively. @@ -2201,6 +2209,7 @@ bool CommandProcessor::PopulateSampler(DrawCommand* draw_command, } bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { + SCOPE_profile_cpu_f("gpu"); auto& regs = *register_file_; // This is used to resolve surfaces, taking them from EDRAM render targets diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.cc b/src/xenia/gpu/gl4/gl4_graphics_system.cc index a977f2a6f..2edc4277d 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.cc +++ b/src/xenia/gpu/gl4/gl4_graphics_system.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include namespace xe { @@ -43,17 +44,16 @@ X_STATUS GL4GraphicsSystem::Setup() { control_ = std::make_unique(loop); emulator_->main_window()->AddChild(control_.get()); - if (FLAGS_thread_safe_gl) { - control_->context()->MakeCurrent(); - } - // Setup the GL context the command processor will do all its drawing in. // It's shared with the control context so that we can resolve framebuffers // from it. processor_context = control_->context()->CreateShared(); - if (FLAGS_thread_safe_gl) { - control_->context()->ClearCurrent(); + { + GLContextLock context_lock(control_->context()); + auto profiler_display = + std::make_unique(control_.get()); + Profiler::set_display(std::move(profiler_display)); } control_ready_fence.Signal(); diff --git a/src/xenia/gpu/gl4/gl4_profiler_display.cc b/src/xenia/gpu/gl4/gl4_profiler_display.cc new file mode 100644 index 000000000..21929ab48 --- /dev/null +++ b/src/xenia/gpu/gl4/gl4_profiler_display.cc @@ -0,0 +1,560 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include + +#include +#include +#include + +namespace xe { +namespace gpu { +namespace gl4 { + +extern "C" GLEWContext* glewGetContext(); + +#define MICROPROFILE_MAX_VERTICES (16 << 10) +#define MICROPROFILE_NUM_QUERIES (8 << 10) +#define MAX_FONT_CHARS 256 +#define Q0(d, member, v) d[0].member = v +#define Q1(d, member, v) \ + d[1].member = v; \ + d[3].member = v +#define Q2(d, member, v) d[4].member = v +#define Q3(d, member, v) \ + d[2].member = v; \ + d[5].member = v + +const int FONT_TEX_X = 1024; +const int FONT_TEX_Y = 9; + +const uint8_t profiler_font[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x78, 0x38, 0x78, + 0x7c, 0x7c, 0x3c, 0x44, 0x38, 0x04, 0x44, 0x40, 0x44, 0x44, 0x38, 0x78, + 0x38, 0x78, 0x38, 0x7c, 0x44, 0x44, 0x44, 0x44, 0x44, 0x7c, 0x00, 0x00, + 0x40, 0x00, 0x04, 0x00, 0x18, 0x00, 0x40, 0x10, 0x08, 0x40, 0x30, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x38, 0x10, 0x38, 0x7c, 0x08, 0x7c, 0x1c, 0x7c, 0x38, 0x38, + 0x10, 0x28, 0x28, 0x10, 0x00, 0x20, 0x10, 0x08, 0x10, 0x10, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x20, 0x38, 0x38, 0x70, 0x00, + 0x1c, 0x10, 0x00, 0x1c, 0x10, 0x70, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x28, 0x44, 0x44, 0x44, 0x40, 0x40, 0x40, 0x44, + 0x10, 0x04, 0x48, 0x40, 0x6c, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x10, + 0x44, 0x44, 0x44, 0x44, 0x44, 0x04, 0x00, 0x00, 0x40, 0x00, 0x04, 0x00, + 0x24, 0x00, 0x40, 0x00, 0x00, 0x40, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x30, + 0x44, 0x04, 0x18, 0x40, 0x20, 0x04, 0x44, 0x44, 0x10, 0x28, 0x28, 0x3c, + 0x44, 0x50, 0x10, 0x10, 0x08, 0x54, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x00, 0x08, 0x00, 0x10, 0x44, 0x44, 0x40, 0x40, 0x04, 0x28, 0x00, 0x30, + 0x10, 0x18, 0x58, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x44, 0x40, 0x44, 0x40, 0x40, 0x40, 0x44, 0x10, 0x04, 0x50, 0x40, + 0x54, 0x64, 0x44, 0x44, 0x44, 0x44, 0x40, 0x10, 0x44, 0x44, 0x44, 0x28, + 0x28, 0x08, 0x00, 0x38, 0x78, 0x3c, 0x3c, 0x38, 0x20, 0x38, 0x78, 0x30, + 0x18, 0x44, 0x10, 0x6c, 0x78, 0x38, 0x78, 0x3c, 0x5c, 0x3c, 0x3c, 0x44, + 0x44, 0x44, 0x44, 0x44, 0x7c, 0x00, 0x4c, 0x10, 0x04, 0x08, 0x28, 0x78, + 0x40, 0x08, 0x44, 0x44, 0x10, 0x00, 0x7c, 0x50, 0x08, 0x50, 0x00, 0x20, + 0x04, 0x38, 0x10, 0x00, 0x00, 0x00, 0x08, 0x10, 0x10, 0x10, 0x7c, 0x08, + 0x08, 0x54, 0x40, 0x20, 0x04, 0x44, 0x00, 0x30, 0x10, 0x18, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x78, 0x40, 0x44, + 0x78, 0x78, 0x40, 0x7c, 0x10, 0x04, 0x60, 0x40, 0x54, 0x54, 0x44, 0x78, + 0x44, 0x78, 0x38, 0x10, 0x44, 0x44, 0x54, 0x10, 0x10, 0x10, 0x00, 0x04, + 0x44, 0x40, 0x44, 0x44, 0x78, 0x44, 0x44, 0x10, 0x08, 0x48, 0x10, 0x54, + 0x44, 0x44, 0x44, 0x44, 0x60, 0x40, 0x10, 0x44, 0x44, 0x44, 0x28, 0x44, + 0x08, 0x00, 0x54, 0x10, 0x18, 0x18, 0x48, 0x04, 0x78, 0x10, 0x38, 0x3c, + 0x10, 0x00, 0x28, 0x38, 0x10, 0x20, 0x00, 0x20, 0x04, 0x10, 0x7c, 0x00, + 0x7c, 0x00, 0x10, 0x00, 0x00, 0x20, 0x00, 0x04, 0x10, 0x5c, 0x40, 0x10, + 0x04, 0x00, 0x00, 0x60, 0x10, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x7c, 0x44, 0x40, 0x44, 0x40, 0x40, 0x4c, 0x44, + 0x10, 0x04, 0x50, 0x40, 0x44, 0x4c, 0x44, 0x40, 0x54, 0x50, 0x04, 0x10, + 0x44, 0x44, 0x54, 0x28, 0x10, 0x20, 0x00, 0x3c, 0x44, 0x40, 0x44, 0x7c, + 0x20, 0x44, 0x44, 0x10, 0x08, 0x70, 0x10, 0x54, 0x44, 0x44, 0x44, 0x44, + 0x40, 0x38, 0x10, 0x44, 0x44, 0x54, 0x10, 0x44, 0x10, 0x00, 0x64, 0x10, + 0x20, 0x04, 0x7c, 0x04, 0x44, 0x20, 0x44, 0x04, 0x10, 0x00, 0x7c, 0x14, + 0x20, 0x54, 0x00, 0x20, 0x04, 0x38, 0x10, 0x10, 0x00, 0x00, 0x20, 0x10, + 0x10, 0x10, 0x7c, 0x08, 0x10, 0x58, 0x40, 0x08, 0x04, 0x00, 0x00, 0x30, + 0x10, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x44, 0x44, 0x44, 0x40, 0x40, 0x44, 0x44, 0x10, 0x44, 0x48, 0x40, + 0x44, 0x44, 0x44, 0x40, 0x48, 0x48, 0x44, 0x10, 0x44, 0x28, 0x6c, 0x44, + 0x10, 0x40, 0x00, 0x44, 0x44, 0x40, 0x44, 0x40, 0x20, 0x3c, 0x44, 0x10, + 0x08, 0x48, 0x10, 0x54, 0x44, 0x44, 0x44, 0x44, 0x40, 0x04, 0x12, 0x4c, + 0x28, 0x54, 0x28, 0x3c, 0x20, 0x00, 0x44, 0x10, 0x40, 0x44, 0x08, 0x44, + 0x44, 0x20, 0x44, 0x08, 0x00, 0x00, 0x28, 0x78, 0x44, 0x48, 0x00, 0x10, + 0x08, 0x54, 0x10, 0x10, 0x00, 0x00, 0x40, 0x00, 0x10, 0x08, 0x00, 0x10, + 0x00, 0x40, 0x40, 0x04, 0x04, 0x00, 0x00, 0x30, 0x10, 0x18, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x78, 0x38, 0x78, + 0x7c, 0x40, 0x3c, 0x44, 0x38, 0x38, 0x44, 0x7c, 0x44, 0x44, 0x38, 0x40, + 0x34, 0x44, 0x38, 0x10, 0x38, 0x10, 0x44, 0x44, 0x10, 0x7c, 0x00, 0x3c, + 0x78, 0x3c, 0x3c, 0x3c, 0x20, 0x04, 0x44, 0x38, 0x48, 0x44, 0x38, 0x44, + 0x44, 0x38, 0x78, 0x3c, 0x40, 0x78, 0x0c, 0x34, 0x10, 0x6c, 0x44, 0x04, + 0x7c, 0x00, 0x38, 0x38, 0x7c, 0x38, 0x08, 0x38, 0x38, 0x20, 0x38, 0x70, + 0x10, 0x00, 0x28, 0x10, 0x00, 0x34, 0x00, 0x08, 0x10, 0x10, 0x00, 0x20, + 0x00, 0x10, 0x00, 0x00, 0x20, 0x04, 0x00, 0x20, 0x10, 0x3c, 0x70, 0x00, + 0x1c, 0x00, 0x7c, 0x1c, 0x10, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x38, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x04, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +GL4ProfilerDisplay::GL4ProfilerDisplay(WGLControl* control) + : control_(control), + program_(0), + vao_(0), + font_texture_(0), + font_handle_(0), + vertex_buffer_(MICROPROFILE_MAX_VERTICES * sizeof(Vertex) * 10, sizeof(Vertex)), + draw_command_count_(0) { + if (!SetupFont() || !SetupState() || !SetupShaders()) { + // Hrm. + assert_always(); + } + + // Pass through mouse events. + control->on_mouse_down.AddListener([](poly::ui::MouseEvent& e) { + Profiler::OnMouseDown(e.button() == poly::ui::MouseEvent::Button::kLeft, + e.button() == poly::ui::MouseEvent::Button::kRight); + e.set_handled(true); + }); + control->on_mouse_up.AddListener([](poly::ui::MouseEvent& e) { + Profiler::OnMouseUp(); + e.set_handled(true); + }); + control->on_mouse_move.AddListener([](poly::ui::MouseEvent& e) { + Profiler::OnMouseMove(e.x(), e.y()); + e.set_handled(true); + }); + control->on_mouse_wheel.AddListener([](poly::ui::MouseEvent& e) { + Profiler::OnMouseWheel(e.x(), e.y(), -e.dy()); + e.set_handled(true); + }); + + // Watch for toggle/mode keys and such. + control->on_key_down.AddListener([](poly::ui::KeyEvent& e) { + Profiler::OnKeyDown(e.key_code()); + e.set_handled(true); + }); + control->on_key_up.AddListener([](poly::ui::KeyEvent& e) { + Profiler::OnKeyUp(e.key_code()); + e.set_handled(true); + }); +} + +bool GL4ProfilerDisplay::SetupFont() { + // Setup font lookup table. + for (uint32_t i = 0; i < poly::countof(font_description_.char_offsets); ++i) { + font_description_.char_offsets[i] = 206; + } + for (uint32_t i = 'A'; i <= 'Z'; ++i) { + font_description_.char_offsets[i] = (i - 'A') * 8 + 1; + } + for (uint32_t i = 'a'; i <= 'z'; ++i) { + font_description_.char_offsets[i] = (i - 'a') * 8 + 217; + } + for (uint32_t i = '0'; i <= '9'; ++i) { + font_description_.char_offsets[i] = (i - '0') * 8 + 433; + } + for (uint32_t i = '!'; i <= '/'; ++i) { + font_description_.char_offsets[i] = (i - '!') * 8 + 513; + } + for (uint32_t i = ':'; i <= '@'; ++i) { + font_description_.char_offsets[i] = (i - ':') * 8 + 625 + 8; + } + for (uint32_t i = '['; i <= '_'; ++i) { + font_description_.char_offsets[i] = (i - '[') * 8 + 681 + 8; + } + for (uint32_t i = '{'; i <= '~'; ++i) { + font_description_.char_offsets[i] = (i - '{') * 8 + 721 + 8; + } + + // Unpack font bitmap into an RGBA texture. + const int UNPACKED_SIZE = FONT_TEX_X * FONT_TEX_Y * 4; + uint32_t unpacked[UNPACKED_SIZE]; + int idx = 0; + int end = FONT_TEX_X * FONT_TEX_Y / 8; + for (int i = 0; i < end; i++) { + uint8_t b = profiler_font[i]; + for (int j = 0; j < 8; ++j) { + unpacked[idx++] = b & 0x80 ? 0xFFFFFFFFu : 0; + b <<= 1; + } + } + + glCreateTextures(GL_TEXTURE_2D, 1, &font_texture_); + glTextureParameteri(font_texture_, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTextureParameteri(font_texture_, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTextureParameteri(font_texture_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTextureParameteri(font_texture_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTextureStorage2D(font_texture_, 1, GL_RGBA8, FONT_TEX_X, FONT_TEX_Y); + glTextureSubImage2D(font_texture_, 0, 0, 0, FONT_TEX_X, FONT_TEX_Y, GL_RGBA, + GL_UNSIGNED_BYTE, unpacked); + + font_handle_ = glGetTextureHandleARB(font_texture_); + glMakeTextureHandleResidentARB(font_handle_); + + return true; +} + +bool GL4ProfilerDisplay::SetupState() { + if (!vertex_buffer_.Initialize()) { + return false; + } + return true; +} + +bool GL4ProfilerDisplay::SetupShaders() { + const std::string header = + "\n\ +#version 450 \n\ +#extension GL_ARB_bindless_texture : require\n\ +#extension GL_ARB_explicit_uniform_location : require\n\ +#extension GL_ARB_shading_language_420pack : require\n\ +precision highp float; \n\ +precision highp int;\n\ +layout(std140, column_major) uniform;\n\ +layout(std430, column_major) buffer;\n\ +struct VertexData {\n\ + vec4 color; \n\ + vec2 uv; \n\ +};\n\ +"; + const std::string vertex_shader_source = header + + "\n\ +layout(location = 0) uniform mat4 projection_matrix; \n\ +struct VertexFetch { \n\ + vec2 pos; \n\ + vec4 color; \n\ + vec2 uv; \n\ +}; \n\ +layout(location = 0) in VertexFetch vfetch; \n\ +layout(location = 0) out VertexData vtx; \n\ +void main() { \n\ + gl_Position = projection_matrix * vec4(vfetch.pos.xy, 0.0, 1.0); \n\ + vtx.color = vfetch.color; \n\ + vtx.uv = vfetch.uv; \n\ +} \n\ +"; + const std::string fragment_shader_source = header + + "\n\ +layout(location = 1, bindless_sampler) uniform sampler2D font_texture; \n\ +layout(location = 2) uniform float font_height; \n\ +layout(location = 0) in VertexData vtx; \n\ +layout(location = 0) out vec4 oC; \n\ +void main() { \n\ + if (vtx.uv.x > 1.0) { \n\ + oC = vtx.color; \n\ + } else { \n\ + vec4 color = texture(font_texture, vtx.uv); \n\ + oC = color.rgba * vtx.color; \n\ + if (color.a < 0.5) { \n\ + vec4 c1 = texture(font_texture, vtx.uv + vec2(0.0, font_height)); \n\ + oC = vec4(0, 0, 0, c1.a); \n\ + } \n\ + } \n\ +} \n\ +"; + + GLuint vertex_shader = glCreateShader(GL_VERTEX_SHADER); + const char* vertex_shader_source_ptr = vertex_shader_source.c_str(); + GLint vertex_shader_source_length = GLint(vertex_shader_source.size()); + glShaderSource(vertex_shader, 1, &vertex_shader_source_ptr, + &vertex_shader_source_length); + glCompileShader(vertex_shader); + + GLuint fragment_shader = glCreateShader(GL_FRAGMENT_SHADER); + const char* fragment_shader_source_ptr = fragment_shader_source.c_str(); + GLint fragment_shader_source_length = GLint(fragment_shader_source.size()); + glShaderSource(fragment_shader, 1, &fragment_shader_source_ptr, + &fragment_shader_source_length); + glCompileShader(fragment_shader); + + program_ = glCreateProgram(); + glAttachShader(program_, vertex_shader); + glAttachShader(program_, fragment_shader); + glLinkProgram(program_); + glDeleteShader(vertex_shader); + glDeleteShader(fragment_shader); + + glProgramUniformHandleui64ARB(program_, 1, font_handle_); + glProgramUniform1f(program_, 2, 1.0f / FONT_TEX_Y); + + glCreateVertexArrays(1, &vao_); + glEnableVertexArrayAttrib(vao_, 0); + glVertexArrayAttribBinding(vao_, 0, 0); + glVertexArrayAttribFormat(vao_, 0, 2, GL_FLOAT, GL_FALSE, + offsetof(Vertex, x)); + glEnableVertexArrayAttrib(vao_, 1); + glVertexArrayAttribBinding(vao_, 1, 0); + glVertexArrayAttribFormat(vao_, 1, 4, GL_UNSIGNED_BYTE, GL_TRUE, + offsetof(Vertex, color)); + glEnableVertexArrayAttrib(vao_, 2); + glVertexArrayAttribBinding(vao_, 2, 0); + glVertexArrayAttribFormat(vao_, 2, 2, GL_FLOAT, GL_FALSE, + offsetof(Vertex, u)); + glVertexArrayVertexBuffer(vao_, 0, vertex_buffer_.handle(), 0, + sizeof(Vertex)); + + return true; +} + +GL4ProfilerDisplay::~GL4ProfilerDisplay() { + vertex_buffer_.Shutdown(); + glMakeTextureHandleNonResidentARB(font_handle_); + glDeleteTextures(1, &font_texture_); + glDeleteVertexArrays(1, &vao_); + glDeleteProgram(program_); +} + +uint32_t GL4ProfilerDisplay::width() const { return control_->width(); } + +uint32_t GL4ProfilerDisplay::height() const { return control_->height(); } + +void GL4ProfilerDisplay::Begin() { + glEnablei(GL_BLEND, 0); + glBlendFunci(0, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glDisable(GL_DEPTH_TEST); + + float left = 0.0f; + float right = float(width()); + float bottom = float(height()); + float top = 0.0f; + float z_near = -1.0f; + float z_far = 1.0f; + float projection[16] = {0}; + projection[0] = 2.0f / (right - left); + projection[5] = 2.0f / (top - bottom); + projection[10] = -2.0f / (z_far - z_near); + projection[12] = -(right + left) / (right - left); + projection[13] = -(top + bottom) / (top - bottom); + projection[14] = -(z_far + z_near) / (z_far - z_near); + projection[15] = 1.0f; + glProgramUniformMatrix4fv(program_, 0, 1, GL_FALSE, projection); + + glUseProgram(program_); + glBindVertexArray(vao_); +} + +void GL4ProfilerDisplay::End() { + Flush(); + glUseProgram(0); + glBindVertexArray(0); +} + +GL4ProfilerDisplay::Vertex* GL4ProfilerDisplay::BeginVertices(size_t count) { + if (draw_command_count_ + 1 > kMaxCommands) { + Flush(); + } + current_allocation_ = vertex_buffer_.Acquire(sizeof(Vertex) * count); + return reinterpret_cast(current_allocation_.host_ptr); +} + +void GL4ProfilerDisplay::EndVertices(GLenum prim_type) { + size_t vertex_count = current_allocation_.length / sizeof(Vertex); + + if (false&&draw_command_count_ && + draw_commands_[draw_command_count_ - 1].prim_type == prim_type) { + // Coalesce. + auto& prev_command = draw_commands_[draw_command_count_ - 1]; + prev_command.vertex_count += vertex_count; + } else { + auto& command = draw_commands_[draw_command_count_++]; + command.prim_type = prim_type; + command.vertex_offset = current_allocation_.offset / sizeof(Vertex); + command.vertex_count = vertex_count; + } + + vertex_buffer_.Commit(std::move(current_allocation_)); +} + +void GL4ProfilerDisplay::Flush() { + if (!draw_command_count_) { + return; + } + for (size_t i = 0; i < draw_command_count_; ++i) { + glDrawArrays(draw_commands_[i].prim_type, + GLint(draw_commands_[i].vertex_offset), + GLsizei(draw_commands_[i].vertex_count)); + } + draw_command_count_ = 0; + vertex_buffer_.WaitUntilClean(); +} + +void GL4ProfilerDisplay::DrawBox(int x0, int y0, int x1, int y1, uint32_t color, + BoxType type) { + if (type == BoxType::kFlat) { + color = + ((color & 0xff) << 16) | ((color >> 16) & 0xff) | (0xff00ff00 & color); + auto v = BeginVertices(6); + Q0(v, x, (float)x0); + Q0(v, y, (float)y0); + Q0(v, color, color); + Q0(v, u, 2.0f); + Q0(v, v, 2.0f); + Q1(v, x, (float)x1); + Q1(v, y, (float)y0); + Q1(v, color, color); + Q1(v, u, 2.0f); + Q1(v, v, 2.0f); + Q2(v, x, (float)x1); + Q2(v, y, (float)y1); + Q2(v, color, color); + Q2(v, u, 2.0f); + Q2(v, v, 2.0f); + Q3(v, x, (float)x0); + Q3(v, y, (float)y1); + Q3(v, color, color); + Q3(v, u, 2.0f); + Q3(v, v, 2.0f); + EndVertices(GL_TRIANGLES); + } else { + uint32_t r = 0xff & (color >> 16); + uint32_t g = 0xff & (color >> 8); + uint32_t b = 0xff & color; + uint32_t nMax = std::max(std::max(std::max(r, g), b), 30u); + uint32_t nMin = std::min(std::min(std::min(r, g), b), 180u); + + uint32_t r0 = 0xff & ((r + nMax) / 2); + uint32_t g0 = 0xff & ((g + nMax) / 2); + uint32_t b0 = 0xff & ((b + nMax) / 2); + + uint32_t r1 = 0xff & ((r + nMin) / 2); + uint32_t g1 = 0xff & ((g + nMin) / 2); + uint32_t b1 = 0xff & ((b + nMin) / 2); + uint32_t color0 = (r0 << 0) | (g0 << 8) | (b0 << 16) | (0xff000000 & color); + uint32_t color1 = (r1 << 0) | (g1 << 8) | (b1 << 16) | (0xff000000 & color); + auto v = BeginVertices(6); + Q0(v, x, (float)x0); + Q0(v, y, (float)y0); + Q0(v, color, color0); + Q0(v, u, 2.0f); + Q0(v, v, 2.0f); + Q1(v, x, (float)x1); + Q1(v, y, (float)y0); + Q1(v, color, color0); + Q1(v, u, 3.0f); + Q1(v, v, 2.0f); + Q2(v, x, (float)x1); + Q2(v, y, (float)y1); + Q2(v, color, color1); + Q2(v, u, 3.0f); + Q2(v, v, 3.0f); + Q3(v, x, (float)x0); + Q3(v, y, (float)y1); + Q3(v, color, color1); + Q3(v, u, 2.0f); + Q3(v, v, 3.0f); + EndVertices(GL_TRIANGLES); + } +} + +void GL4ProfilerDisplay::DrawLine2D(uint32_t count, float* vertices, + uint32_t color) { + if (!count || !vertices) { + return; + } + auto v = BeginVertices(2 * (count - 1)); + color = 0xff000000 | ((color & 0xff) << 16) | (color & 0xff00ff00) | + ((color >> 16) & 0xff); + for (uint32_t i = 0; i < count - 1; ++i) { + v[0].x = vertices[i * 2]; + v[0].y = vertices[i * 2 + 1]; + v[0].color = color; + v[0].u = 2.0f; + v[0].v = 2.0f; + v[1].x = vertices[(i + 1) * 2]; + v[1].y = vertices[(i + 1) * 2 + 1]; + v[1].color = color; + v[1].u = 2.0f; + v[1].v = 2.0f; + v += 2; + } + EndVertices(GL_LINES); +} + +void GL4ProfilerDisplay::DrawText(int x, int y, uint32_t color, + const char* text, size_t text_length) { + const float fOffsetU = 5.0f / 1024.0f; + float fX = (float)x; + float fY = (float)y; + float fY2 = fY + (MICROPROFILE_TEXT_HEIGHT + 1); + + auto v = BeginVertices(6 * text_length); + const char* pStr = text; + color = 0xff000000 | ((color & 0xff) << 16) | (color & 0xff00) | + ((color >> 16) & 0xff); + + for (size_t j = 0; j < text_length; ++j) { + int16_t char_offset = font_description_.char_offsets[(int)*pStr++]; + float fOffset = char_offset / 1024.0f; + Q0(v, x, fX); + Q0(v, y, fY); + Q0(v, color, color); + Q0(v, u, fOffset); + Q0(v, v, 0.0f); + + Q1(v, x, fX + MICROPROFILE_TEXT_WIDTH); + Q1(v, y, fY); + Q1(v, color, color); + Q1(v, u, fOffset + fOffsetU); + Q1(v, v, 0.0f); + + Q2(v, x, fX + MICROPROFILE_TEXT_WIDTH); + Q2(v, y, fY2); + Q2(v, color, color); + Q2(v, u, fOffset + fOffsetU); + Q2(v, v, 1.0f); + + Q3(v, x, fX); + Q3(v, y, fY2); + Q3(v, color, color); + Q3(v, u, fOffset); + Q3(v, v, 1.0f); + + fX += MICROPROFILE_TEXT_WIDTH + 1; + v += 6; + } + + EndVertices(GL_TRIANGLES); +} + +} // namespace gl4 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/gl4/gl4_profiler_display.h b/src/xenia/gpu/gl4/gl4_profiler_display.h new file mode 100644 index 000000000..d5290d62f --- /dev/null +++ b/src/xenia/gpu/gl4/gl4_profiler_display.h @@ -0,0 +1,84 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_GL4_GL4_PROFILER_DISPLAY_H_ +#define XENIA_GPU_GL4_GL4_PROFILER_DISPLAY_H_ + +#include +#include +#include +#include +#include + +namespace xe { +namespace gpu { +namespace gl4 { + +class GL4ProfilerDisplay : public ProfilerDisplay { + public: + GL4ProfilerDisplay(WGLControl* control); + virtual ~GL4ProfilerDisplay(); + + uint32_t width() const override; + uint32_t height() const override; + + // TODO(benvanik): GPU timestamping. + + void Begin() override; + void End() override; + void DrawBox(int x0, int y0, int x1, int y1, uint32_t color, + BoxType type) override; + void DrawLine2D(uint32_t count, float* vertices, uint32_t color) override; + void DrawText(int x, int y, uint32_t color, const char* text, + size_t text_length) override; + + private: + struct Vertex { + float x; + float y; + uint32_t color; + float u; + float v; + }; + + bool SetupFont(); + bool SetupState(); + bool SetupShaders(); + + Vertex* BeginVertices(size_t count); + void EndVertices(GLenum prim_type); + void Flush(); + + WGLControl* control_; + GLuint program_; + GLuint vao_; + GLuint font_texture_; + GLuint64 font_handle_; + CircularBuffer vertex_buffer_; + + static const size_t kMaxCommands = 32; + struct { + GLenum prim_type; + size_t vertex_offset; + size_t vertex_count; + } draw_commands_[kMaxCommands]; + uint32_t draw_command_count_; + + CircularBuffer::Allocation current_allocation_; + + struct { + uint16_t char_offsets[256]; + } font_description_; +}; + +} // namespace gl4 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_GL4_GL4_PROFILER_DISPLAY_H_ diff --git a/src/xenia/gpu/gl4/gl_context.cc b/src/xenia/gpu/gl4/gl_context.cc index 76824f4d1..d5e39fa34 100644 --- a/src/xenia/gpu/gl4/gl_context.cc +++ b/src/xenia/gpu/gl4/gl_context.cc @@ -289,8 +289,8 @@ void GLContext::SetupDebugging() { // intended to be used as an offset into a buffer object? }; glDebugMessageControl(GL_DEBUG_SOURCE_API, GL_DEBUG_TYPE_OTHER, GL_DONT_CARE, - poly::countof(disable_message_ids), disable_message_ids, - GL_FALSE); + GLsizei(poly::countof(disable_message_ids)), + disable_message_ids, GL_FALSE); // Callback will be made from driver threads. glDebugMessageCallback(reinterpret_cast(&DebugMessageThunk), @@ -298,6 +298,7 @@ void GLContext::SetupDebugging() { } bool GLContext::MakeCurrent() { + SCOPE_profile_cpu_f("gpu"); if (FLAGS_thread_safe_gl) { global_gl_mutex_.lock(); } diff --git a/src/xenia/gpu/gl4/sources.gypi b/src/xenia/gpu/gl4/sources.gypi index bdeeae80a..efc52f2b6 100644 --- a/src/xenia/gpu/gl4/sources.gypi +++ b/src/xenia/gpu/gl4/sources.gypi @@ -10,6 +10,8 @@ 'gl4_gpu.h', 'gl4_graphics_system.cc', 'gl4_graphics_system.h', + 'gl4_profiler_display.cc', + 'gl4_profiler_display.h', 'gl4_shader.cc', 'gl4_shader.h', 'gl4_shader_translator.cc', diff --git a/src/xenia/gpu/gl4/wgl_control.cc b/src/xenia/gpu/gl4/wgl_control.cc index dacc22890..ce02f7b2f 100644 --- a/src/xenia/gpu/gl4/wgl_control.cc +++ b/src/xenia/gpu/gl4/wgl_control.cc @@ -48,7 +48,7 @@ bool WGLControl::Create() { } // Create window. - DWORD window_style = WS_CHILD | WS_VISIBLE; + DWORD window_style = WS_CHILD | WS_VISIBLE | SS_NOTIFY; DWORD window_ex_style = 0; hwnd_ = CreateWindowEx(window_ex_style, L"XeniaWglClass", L"Xenia", window_style, @@ -64,6 +64,8 @@ bool WGLControl::Create() { return false; } + SetFocus(hwnd_); + OnCreate(); return true; } @@ -74,6 +76,7 @@ LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { case WM_PAINT: { + SCOPE_profile_cpu_i("gpu", "xe::gpu::gl4::WGLControl::WM_PAINT"); { GLContextLock context_lock(&context_); wglSwapIntervalEXT(0); @@ -100,7 +103,10 @@ LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam, glClearNamedFramebufferfv(0, GL_COLOR, 0, red); glDisable(GL_SCISSOR_TEST); } - SwapBuffers(context_.dc()); + { + SCOPE_profile_cpu_i("gpu", "xe::gpu::gl4::WGLControl::SwapBuffers"); + SwapBuffers(context_.dc()); + } } break; } return Win32Control::WndProc(hWnd, message, wParam, lParam); diff --git a/src/xenia/profiling.cc b/src/xenia/profiling.cc index a8a47dcec..2267980cc 100644 --- a/src/xenia/profiling.cc +++ b/src/xenia/profiling.cc @@ -7,17 +7,21 @@ ****************************************************************************** */ -#define MICROPROFILE_IMPL +#include + +#define MICROPROFILE_ENABLED 1 +#define MICROPROFILEUI_ENABLED 1 +#define MICROPROFILE_IMPL 1 +#define MICROPROFILEUI_IMPL 1 #define MICROPROFILE_USE_THREAD_NAME_CALLBACK 1 #define MICROPROFILE_PRINTF PLOGI +#define MICROPROFILE_WEBSERVER 0 +#define MICROPROFILE_DEBUG 0 #include +#include #include -#if XE_OPTION_PROFILING_UI -#include -#endif // XE_OPTION_PROFILING_UI - namespace xe { std::unique_ptr Profiler::display_ = nullptr; @@ -25,16 +29,38 @@ std::unique_ptr Profiler::display_ = nullptr; #if XE_OPTION_PROFILING void Profiler::Initialize() { - MicroProfileSetForceEnable(true); - MicroProfileSetEnableAllGroups(true); - MicroProfileSetForceMetaCounters(true); + // Custom groups. + MicroProfileSetEnableAllGroups(false); + MicroProfileForceEnableGroup("alloy", MicroProfileTokenTypeCpu); + MicroProfileForceEnableGroup("apu", MicroProfileTokenTypeCpu); + MicroProfileForceEnableGroup("cpu", MicroProfileTokenTypeCpu); + MicroProfileForceEnableGroup("gpu", MicroProfileTokenTypeCpu); + MicroProfileForceEnableGroup("internal", MicroProfileTokenTypeCpu); + g_MicroProfile.nGroupMask = g_MicroProfile.nForceGroup; + g_MicroProfile.nActiveGroup = g_MicroProfile.nActiveGroupWanted = + g_MicroProfile.nGroupMask; + + // Custom timers: time, average. + g_MicroProfile.nBars |= 0x1 | 0x2; + g_MicroProfile.nActiveBars |= 0x1 | 0x2; + #if XE_OPTION_PROFILING_UI MicroProfileInitUI(); + g_MicroProfileUI.bShowSpikes = true; + g_MicroProfileUI.nOpacityBackground = 0x40 << 24; + g_MicroProfileUI.nOpacityForeground = 0xc0 << 24; MicroProfileSetDisplayMode(1); +#else + MicroProfileSetForceEnable(true); + MicroProfileSetEnableAllGroups(true); + MicroProfileSetForceMetaCounters(false); #endif // XE_OPTION_PROFILING_UI } void Profiler::Dump() { +#if XE_OPTION_PROFILING_UI + MicroProfileDumpTimers(); +#endif // XE_OPTION_PROFILING_UI MicroProfileDumpHtml("profile.html"); MicroProfileDumpHtmlToFile(); } @@ -116,29 +142,14 @@ void Profiler::set_display(std::unique_ptr display) { } void Profiler::Present() { + SCOPE_profile_cpu_f("internal"); MicroProfileFlip(); #if XE_OPTION_PROFILING_UI if (!display_) { return; } - float left = 0.f; - float right = display_->width(); - float bottom = display_->height(); - float top = 0.f; - float near = -1.f; - float far = 1.f; - float projection[16] = {0}; - projection[0] = 2.0f / (right - left); - projection[5] = 2.0f / (top - bottom); - projection[10] = -2.0f / (far - near); - projection[12] = -(right + left) / (right - left); - projection[13] = -(top + bottom) / (top - bottom); - projection[14] = -(far + near) / (far - near); - projection[15] = 1.f; display_->Begin(); - MicroProfileBeginDraw(display_->width(), display_->height(), projection); MicroProfileDraw(display_->width(), display_->height()); - MicroProfileEndDraw(); display_->End(); #endif // XE_OPTION_PROFILING_UI } diff --git a/src/xenia/profiling.h b/src/xenia/profiling.h index 4c18b4bac..2428c78d3 100644 --- a/src/xenia/profiling.h +++ b/src/xenia/profiling.h @@ -16,7 +16,7 @@ #define XE_OPTION_PROFILING 1 #if XE_LIKE_WIN32 -//#define XE_OPTION_PROFILING_UI 1 +#define XE_OPTION_PROFILING_UI 1 #endif // XE_LIKE_WIN32 #if XE_OPTION_PROFILING @@ -122,13 +122,13 @@ namespace xe { class ProfilerDisplay { public: - enum BoxType { + enum class BoxType { #if XE_OPTION_PROFILING - BOX_TYPE_BAR = MicroProfileBoxTypeBar, - BOX_TYPE_FLAT = MicroProfileBoxTypeFlat, + kBar = MicroProfileBoxTypeBar, + kFlat = MicroProfileBoxTypeFlat, #else - BOX_TYPE_BAR, - BOX_TYPE_FLAT, + kBar, + kFlat, #endif // XE_OPTION_PROFILING }; @@ -139,7 +139,7 @@ class ProfilerDisplay { virtual void Begin() = 0; virtual void End() = 0; - virtual void DrawBox(int x, int y, int x1, int y1, uint32_t color, + virtual void DrawBox(int x0, int y0, int x1, int y1, uint32_t color, BoxType type) = 0; virtual void DrawLine2D(uint32_t count, float* vertices, uint32_t color) = 0; virtual void DrawText(int x, int y, uint32_t color, const char* text, diff --git a/third_party/microprofile/microprofileui.h b/third_party/microprofile/microprofileui.h index bce3904dd..b84bf8c9c 100644 --- a/third_party/microprofile/microprofileui.h +++ b/third_party/microprofile/microprofileui.h @@ -592,7 +592,7 @@ void MicroProfileCenter(int64_t nTickCenter) float fCenter = MicroProfileLogTickDifference(nStart, nTickCenter) * fToMs; UI.fDetailedOffsetTarget = UI.fDetailedOffset = fCenter - 0.5f * UI.fDetailedRange; } -#ifdef MICROPROFILE_DEBUG +#if MICROPROFILE_DEBUG uint64_t* g_pMicroProfileDumpStart = 0; uint64_t* g_pMicroProfileDumpEnd = 0; void MicroProfileDebugDumpRange()