From ea35619a781d51abbd7cb726e96105aef50ea499 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 17 Sep 2022 20:37:49 -0500 Subject: [PATCH] Common: Add class for spinning GPUs --- common/CMakeLists.txt | 2 + common/ReadbackSpinManager.cpp | 230 +++++++++++++++++++++++++++++++++ common/ReadbackSpinManager.h | 65 ++++++++++ common/common.vcxproj | 2 + common/common.vcxproj.filters | 6 + 5 files changed, 305 insertions(+) create mode 100644 common/ReadbackSpinManager.cpp create mode 100644 common/ReadbackSpinManager.h diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 4765931909..b61227323d 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -27,6 +27,7 @@ target_sources(common PRIVATE PrecompiledHeader.cpp Perf.cpp ProgressCallback.cpp + ReadbackSpinManager.cpp Semaphore.cpp SettingsWrapper.cpp StringUtil.cpp @@ -86,6 +87,7 @@ target_sources(common PRIVATE PageFaultSource.h PrecompiledHeader.h ProgressCallback.h + ReadbackSpinManager.h RedtapeWindows.h SafeArray.h ScopedGuard.h diff --git a/common/ReadbackSpinManager.cpp b/common/ReadbackSpinManager.cpp new file mode 100644 index 0000000000..90505a8e08 --- /dev/null +++ b/common/ReadbackSpinManager.cpp @@ -0,0 +1,230 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2022 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "ReadbackSpinManager.h" + +#include + +static bool EventIsReadback(const ReadbackSpinManager::Event& event) +{ + return event.size < 0; +} + +static bool EventIsDraw(const ReadbackSpinManager::Event& event) +{ + return !EventIsReadback(event); +} + +static bool IsCompleted(const ReadbackSpinManager::Event& event) +{ + return event.begin != event.end; +} + +static int Similarity(const std::vector& a, std::vector& b) +{ + u32 a_num_readbacks = std::count_if(a.begin(), a.end(), EventIsReadback); + u32 b_num_readbacks = std::count_if(b.begin(), b.end(), EventIsReadback); + + int score = 0x10 - abs(static_cast(a.size() - b.size())); + + if (a_num_readbacks == b_num_readbacks) + score += 0x10000; + + auto a_idx = a.begin(); + auto b_idx = b.begin(); + while (a_idx != a.end() && b_idx != b.end()) + { + if (EventIsReadback(*a_idx) && EventIsReadback(*b_idx)) + { + // Same number of events between readbacks + score += 0x1000; + } + // Try to match up on readbacks + else if (EventIsReadback(*a_idx)) + { + b_idx++; + continue; + } + else if (EventIsReadback(*b_idx)) + { + a_idx++; + continue; + } + else if (a_idx->size == b_idx->size) + { + // Same size + score += 0x100; + } + else if (a_idx->size / 2 <= b_idx->size && b_idx->size / 2 <= a_idx->size) + { + // Similar size + score += 0x10; + } + a_idx++; + b_idx++; + continue; + } + // Both hit the end at the same time + if (a_idx == a.end() && b_idx == b.end()) + score += 0x1000; + + return score; +} + +static u32 PrevFrameNo(u32 frame, size_t total_frames) +{ + s32 prev_frame = frame - 1; + if (prev_frame < 0) + prev_frame = total_frames - 1; + return prev_frame; +} + +static u32 NextFrameNo(u32 frame, size_t total_frames) +{ + u32 next_frame = frame + 1; + if (next_frame >= total_frames) + next_frame = 0; + return next_frame; +} + +void ReadbackSpinManager::ReadbackRequested() +{ + Event ev = {}; + ev.size = -1; + m_frames[m_current_frame].push_back(ev); + + // Advance reference frame idx to the next readback + while (m_frames[m_reference_frame].size() > m_reference_frame_idx && + !EventIsReadback(m_frames[m_reference_frame][m_reference_frame_idx])) + { + m_reference_frame_idx++; + } + // ...and past it + if (m_frames[m_reference_frame].size() > m_reference_frame_idx) + m_reference_frame_idx++; +} + +void ReadbackSpinManager::NextFrame() +{ + u32 prev_frame_0 = PrevFrameNo(m_current_frame, std::size(m_frames)); + u32 prev_frame_1 = PrevFrameNo(prev_frame_0, std::size(m_frames)); + int similarity_0 = Similarity(m_frames[m_current_frame], m_frames[prev_frame_0]); + int similarity_1 = Similarity(m_frames[m_current_frame], m_frames[prev_frame_1]); + + if (similarity_1 > similarity_0) + m_reference_frame = prev_frame_0; + else + m_reference_frame = m_current_frame; + m_reference_frame_idx = 0; + + m_current_frame = NextFrameNo(m_current_frame, std::size(m_frames)); + m_frames[m_current_frame].clear(); +} + +ReadbackSpinManager::DrawSubmittedReturn ReadbackSpinManager::DrawSubmitted(u64 size) +{ + DrawSubmittedReturn out = {}; + u32 idx = m_frames[m_current_frame].size(); + out.id = idx | m_current_frame << 28; + Event ev = {}; + ev.size = size; + m_frames[m_current_frame].push_back(ev); + + if (m_reference_frame != m_current_frame && + m_frames[m_reference_frame].size() > m_reference_frame_idx && + EventIsDraw(m_frames[m_reference_frame][m_reference_frame_idx])) + { + auto find_next_draw = [this](u32 frame) -> Event* { + auto next = std::find_if(m_frames[frame].begin() + m_reference_frame_idx + 1, + m_frames[frame].end(), + EventIsDraw); + bool found = next != m_frames[frame].end(); + if (!found) + { + u32 next_frame = NextFrameNo(frame, std::size(m_frames)); + next = std::find_if(m_frames[next_frame].begin(), m_frames[next_frame].end(), EventIsDraw); + found = next != m_frames[next_frame].end(); + } + return found ? &*next : nullptr; + }; + Event* cur_draw = &m_frames[m_reference_frame][m_reference_frame_idx]; + Event* next_draw = find_next_draw(m_reference_frame); + const bool is_one_frame_back = m_reference_frame == PrevFrameNo(m_current_frame, std::size(m_frames)); + if ((!next_draw || !IsCompleted(*cur_draw) || !IsCompleted(*next_draw)) && is_one_frame_back) + { + // Last frame's timing data hasn't arrived, try the same spot in the frame before + u32 two_back = PrevFrameNo(m_reference_frame, std::size(m_frames)); + if (m_frames[two_back].size() > m_reference_frame_idx && + EventIsDraw(m_frames[two_back][m_reference_frame_idx])) + { + cur_draw = &m_frames[two_back][m_reference_frame_idx]; + next_draw = find_next_draw(two_back); + } + } + if (next_draw && IsCompleted(*cur_draw) && IsCompleted(*next_draw) && m_spins_per_unit_time != 0) + { + u64 cur_size = cur_draw->size; + bool is_similar = cur_size / 2 <= size && size / 2 <= cur_size; + if (is_similar) // Only recommend spins if we're somewhat confident in what's going on + { + s32 current_draw_time = cur_draw->end - cur_draw->begin; + s32 gap = next_draw->begin - cur_draw->end; + // Give an extra bit of space for the draw to take a bit longer (we'll go with 1/8 longer) + s32 fill = gap - (current_draw_time >> 3); + if (fill > 0) + out.recommended_spin = static_cast(static_cast(fill) * m_spins_per_unit_time); + } + } + + m_reference_frame_idx++; + } + + if (m_spins_per_unit_time == 0) + { + // Recommend some spinning so that we can get timing data + out.recommended_spin = 128; + } + + return out; +} + +void ReadbackSpinManager::DrawCompleted(u32 id, u32 begin_time, u32 end_time) +{ + u32 frame_id = id >> 28; + u32 frame_off = id & ((1 << 28) - 1); + if (frame_id < std::size(m_frames) && frame_off < m_frames[frame_id].size()) + { + Event& ev = m_frames[frame_id][frame_off]; + ev.begin = begin_time; + ev.end = end_time; + } +} + +void ReadbackSpinManager::SpinCompleted(u32 cycles, u32 begin_time, u32 end_time) +{ + double elapsed = static_cast(end_time - begin_time); + constexpr double decay = 15.0 / 16.0; + + // Obviously it'll vary from GPU to GPU, but in my testing, + // both a Radeon Pro 5600M and Intel UHD 630 spin at about 100ns/cycle + + // Note: We assume spin time is some constant times the number of cycles + // Obviously as the number of cycles gets really low, a constant offset may start being noticeable + // But this is not the case as low as 512 cycles (~50µs) on the GPUs listed above + + m_total_spin_cycles = m_total_spin_cycles * decay + cycles; + m_total_spin_time = m_total_spin_time * decay + elapsed; + m_spins_per_unit_time = m_total_spin_cycles / m_total_spin_time; +} diff --git a/common/ReadbackSpinManager.h b/common/ReadbackSpinManager.h new file mode 100644 index 0000000000..52b3a46850 --- /dev/null +++ b/common/ReadbackSpinManager.h @@ -0,0 +1,65 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2022 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#include "Pcsx2Defs.h" + +#include + +/// A class for calculating optimal spin values to trick OSes into not powering down GPUs while waiting for readbacks +class ReadbackSpinManager +{ +public: + struct Event + { + s64 size; + u32 begin; + u32 end; + }; + +private: + double m_spins_per_unit_time = 0; + double m_total_spin_time = 0; + double m_total_spin_cycles = 0; + std::vector m_frames[3]; + u32 m_current_frame = 0; + u32 m_reference_frame = 0; + u32 m_reference_frame_idx = 0; + +public: + struct DrawSubmittedReturn + { + u32 id; + u32 recommended_spin; + }; + + /// Call when a readback is requested + void ReadbackRequested(); + /// Call at the end of a frame + void NextFrame(); + /// Call when a command buffer is submitted to the GPU + /// `size` is used to attempt to find patterns in submissions, and can be any metric that approximates the amount of work in a submission (draw calls, command encoders, etc) + /// Returns an id to be passed to `DrawCompleted`, and the recommended number of spin cycles to perform on the GPU in order to keep it busy + DrawSubmittedReturn DrawSubmitted(u64 size); + /// Call once a draw has been finished by the GPU and you have begin/end data for it + /// `begin_time` and `end_time` can be in any unit as long as it's consistent. It's okay if they roll over, as long as it happens less than once every few frames. + void DrawCompleted(u32 id, u32 begin_time, u32 end_time); + /// Call when a spin completes to help the manager figure out how quickly your GPU spins + void SpinCompleted(u32 cycles, u32 begin_time, u32 end_time); + /// Get the calculated number of spins per unit of time + /// Note: May be zero when there's insufficient data + double SpinsPerUnitTime() const { return m_spins_per_unit_time; } +}; diff --git a/common/common.vcxproj b/common/common.vcxproj index 1f96255e94..694fb5d6d7 100644 --- a/common/common.vcxproj +++ b/common/common.vcxproj @@ -81,6 +81,7 @@ + @@ -177,6 +178,7 @@ + diff --git a/common/common.vcxproj.filters b/common/common.vcxproj.filters index 8f2bc35dd2..333dfb8eab 100644 --- a/common/common.vcxproj.filters +++ b/common/common.vcxproj.filters @@ -58,6 +58,9 @@ Source Files + + Source Files + Source Files @@ -279,6 +282,9 @@ Header Files + + Header Files + Header Files