From d540d284b5711f044678191bbab858de626103a9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 15 Aug 2021 15:35:53 +0200 Subject: [PATCH] VideoCore: Rework Garbage Collection. --- src/common/lru_cache.h | 141 ++++++++++++++++++ src/video_core/buffer_cache/buffer_base.h | 13 +- src/video_core/buffer_cache/buffer_cache.h | 61 ++++---- src/video_core/texture_cache/image_base.h | 2 +- src/video_core/texture_cache/texture_cache.h | 89 ++++------- .../texture_cache/texture_cache_base.h | 8 +- 6 files changed, 213 insertions(+), 101 deletions(-) create mode 100644 src/common/lru_cache.h diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h new file mode 100644 index 0000000000..048e9c3daa --- /dev/null +++ b/src/common/lru_cache.h @@ -0,0 +1,141 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2+ or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" + +namespace Common { + +template +class LeastRecentlyUsedCache { + using ObjectType = typename Traits::ObjectType; + using TickType = typename Traits::TickType; + + struct Item { + ObjectType obj; + TickType tick; + Item* next{}; + Item* prev{}; + }; + +public: + LeastRecentlyUsedCache() : first_item{}, last_item{} {} + ~LeastRecentlyUsedCache() = default; + + size_t Insert(ObjectType obj, TickType tick) { + const auto new_id = build(); + auto& item = item_pool[new_id]; + item.obj = obj; + item.tick = tick; + attach(item); + return new_id; + } + + void Touch(size_t id, TickType tick) { + auto& item = item_pool[id]; + if (item.tick >= tick) { + return; + } + item.tick = tick; + if (&item == last_item) { + return; + } + detach(item); + attach(item); + } + + void Free(size_t id) { + auto& item = item_pool[id]; + detach(item); + item.prev = nullptr; + item.next = nullptr; + free_items.push_back(id); + } + + template + void ForEachItemBelow(TickType tick, Func&& func) { + static constexpr bool RETURNS_BOOL = + std::is_same_v, bool>; + Item* iterator = first_item; + while (iterator) { + if (static_cast(tick) - static_cast(iterator->tick) < 0) { + return; + } + Item* next = iterator->next; + if constexpr (RETURNS_BOOL) { + if (func(iterator->obj)) { + return; + } + } else { + func(iterator->obj); + } + iterator = next; + } + } + +private: + size_t build() { + if (free_items.empty()) { + const size_t item_id = item_pool.size(); + item_pool.emplace_back(); + auto& item = item_pool[item_id]; + item.next = nullptr; + item.prev = nullptr; + return item_id; + } + const size_t item_id = free_items.front(); + free_items.pop_front(); + auto& item = item_pool[item_id]; + item.next = nullptr; + item.prev = nullptr; + return item_id; + } + + void attach(Item& item) { + if (!first_item) { + first_item = &item; + } + if (!last_item) { + last_item = &item; + } else { + item.prev = last_item; + last_item->next = &item; + item.next = nullptr; + last_item = &item; + } + } + + void detach(Item& item) { + if (item.prev) { + item.prev->next = item.next; + } + if (item.next) { + item.next->prev = item.prev; + } + if (&item == first_item) { + first_item = item.next; + if (first_item) { + first_item->prev = nullptr; + } + } + if (&item == last_item) { + last_item = item.prev; + if (last_item) { + last_item->next = nullptr; + } + } + } + + std::deque item_pool; + std::deque free_items; + Item* first_item; + Item* last_item; +}; + +} // namespace Common diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index c3318095c4..4b696a60ff 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -261,16 +261,6 @@ public: stream_score += score; } - /// Sets the new frame tick - void SetFrameTick(u64 new_frame_tick) noexcept { - frame_tick = new_frame_tick; - } - - /// Returns the new frame tick - [[nodiscard]] u64 FrameTick() const noexcept { - return frame_tick; - } - /// Returns the likeliness of this being a stream buffer [[nodiscard]] int StreamScore() const noexcept { return stream_score; @@ -307,6 +297,8 @@ public: return words.size_bytes; } + size_t lru_id; + private: template u64* Array() noexcept { @@ -603,7 +595,6 @@ private: RasterizerInterface* rasterizer = nullptr; VAddr cpu_addr = 0; Words words; - u64 frame_tick = 0; BufferFlagBits flags{}; int stream_score = 0; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3b43554f9a..a0217908ab 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -20,6 +20,7 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/literals.h" +#include "common/lru_cache.h" #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/settings.h" @@ -77,7 +78,7 @@ class BufferCache { static constexpr BufferId NULL_BUFFER_ID{0}; - static constexpr u64 EXPECTED_MEMORY = 512_MiB; + static constexpr u64 EXPECTED_MEMORY = 256_MiB; static constexpr u64 CRITICAL_MEMORY = 1_GiB; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -330,7 +331,7 @@ private: template void ChangeRegister(BufferId buffer_id); - void TouchBuffer(Buffer& buffer) const noexcept; + void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); @@ -428,7 +429,11 @@ private: size_t immediate_buffer_capacity = 0; std::unique_ptr immediate_buffer_alloc; - typename SlotVector::Iterator deletion_iterator; + struct LRUItemParams { + using ObjectType = BufferId; + using TickType = u64; + }; + Common::LeastRecentlyUsedCache lru_cache; u64 frame_tick = 0; u64 total_used_memory = 0; @@ -445,7 +450,6 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); - deletion_iterator = slot_buffers.end(); common_ranges.clear(); } @@ -454,20 +458,17 @@ void BufferCache

::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_buffers.end()) { - deletion_iterator = slot_buffers.begin(); + const auto clean_up = [this, &num_iterations](BufferId buffer_id) { + if (num_iterations == 0) { + return true; } - ++deletion_iterator; - if (deletion_iterator == slot_buffers.end()) { - break; - } - const auto [buffer_id, buffer] = *deletion_iterator; - if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { - DownloadBufferMemory(*buffer); - DeleteBuffer(buffer_id); - } - } + --num_iterations; + auto& buffer = slot_buffers[buffer_id]; + DownloadBufferMemory(buffer); + DeleteBuffer(buffer_id); + return false; + }; + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); } template @@ -954,7 +955,7 @@ bool BufferCache

::IsRegionCpuModified(VAddr addr, size_t size) { template void BufferCache

::BindHostIndexBuffer() { Buffer& buffer = slot_buffers[index_buffer.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, index_buffer.buffer_id); const u32 offset = buffer.Offset(index_buffer.cpu_addr); const u32 size = index_buffer.size; SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); @@ -975,7 +976,7 @@ void BufferCache

::BindHostVertexBuffers() { for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { const Binding& binding = vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; @@ -1011,7 +1012,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size); @@ -1083,7 +1084,7 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { const Binding& binding = storage_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1128,7 +1129,7 @@ void BufferCache

::BindHostTransformFeedbackBuffers() { for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { const Binding& binding = transform_feedback_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1148,7 +1149,7 @@ void BufferCache

::BindHostComputeUniformBuffers() { ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1168,7 +1169,7 @@ void BufferCache

::BindHostComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { const Binding& binding = compute_storage_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1513,11 +1514,11 @@ BufferId BufferCache

::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); const u32 size = static_cast(overlap.end - overlap.begin); const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); - TouchBuffer(slot_buffers[new_buffer_id]); for (const BufferId overlap_id : overlap.ids) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } Register(new_buffer_id); + TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); return new_buffer_id; } @@ -1534,12 +1535,14 @@ void BufferCache

::Unregister(BufferId buffer_id) { template template void BufferCache

::ChangeRegister(BufferId buffer_id) { - const Buffer& buffer = slot_buffers[buffer_id]; + Buffer& buffer = slot_buffers[buffer_id]; const auto size = buffer.SizeBytes(); if (insert) { total_used_memory += Common::AlignUp(size, 1024); + buffer.lru_id = lru_cache.Insert(buffer_id, frame_tick); } else { total_used_memory -= Common::AlignUp(size, 1024); + lru_cache.Free(buffer.lru_id); } const VAddr cpu_addr_begin = buffer.CpuAddr(); const VAddr cpu_addr_end = cpu_addr_begin + size; @@ -1555,8 +1558,10 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { } template -void BufferCache

::TouchBuffer(Buffer& buffer) const noexcept { - buffer.SetFrameTick(frame_tick); +void BufferCache

::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { + if (buffer_id != NULL_BUFFER_ID) { + lru_cache.Touch(buffer.lru_id, frame_tick); + } } template diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index ff1feda9b3..662089e3de 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -80,7 +80,7 @@ struct ImageBase { VAddr cpu_addr_end = 0; u64 modification_tick = 0; - u64 frame_tick = 0; + size_t lru_index = ~0; std::array mip_level_offsets{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a087498ff5..c16cc0838d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -43,8 +43,6 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_image_views.insert(runtime, NullImageParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); - deletion_iterator = slot_images.begin(); - if constexpr (HAS_DEVICE_MEMORY_INFO) { const auto device_memory = runtime.GetDeviceLocalMemory(); const u64 possible_expected_memory = (device_memory * 3) / 10; @@ -64,65 +62,33 @@ template void TextureCache

::RunGarbageCollector() { const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; - int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_images.end()) { - deletion_iterator = slot_images.begin(); - if (deletion_iterator == slot_images.end()) { - break; - } + const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 50ULL : 100ULL; + size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 50 : 5); + const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { + if (num_iterations == 0) { + return true; } - auto [image_id, image_tmp] = *deletion_iterator; - Image* image = image_tmp; // fix clang error. - const bool is_alias = True(image->flags & ImageFlagBits::Alias); - const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); - const bool must_download = image->IsSafeDownload(); - bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); - const u64 ticks_needed = - is_bad_overlap - ? ticks_to_destroy >> 4 - : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); - should_care |= aggressive_mode; - if (should_care && image->frame_tick + ticks_needed < frame_tick) { - if (is_bad_overlap) { - const bool overlap_check = std::ranges::all_of( - image->overlapping_images, [&, image](const ImageId& overlap_id) { - auto& overlap = slot_images[overlap_id]; - return overlap.frame_tick >= image->frame_tick; - }); - if (!overlap_check) { - ++deletion_iterator; - continue; - } - } - if (!is_bad_overlap && must_download) { - const bool alias_check = std::ranges::none_of( - image->aliased_images, [&, image](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick < image->frame_tick) || - (alias_image.modification_tick < image->modification_tick); - }); - - if (alias_check) { - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); - } - } - if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id); - if (is_bad_overlap) { - ++num_iterations; - } + --num_iterations; + auto& image = slot_images[image_id]; + const bool must_download = image.IsSafeDownload(); + if (!high_priority_mode && must_download) { + return false; } - ++deletion_iterator; - } + if (must_download) { + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + } + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id); + return false; + }; + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); } template @@ -1078,6 +1044,8 @@ void TextureCache

::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + image.lru_index = lru_cache.Insert(image_id, frame_tick); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); if (False(image.flags & ImageFlagBits::Sparse)) { @@ -1115,6 +1083,7 @@ void TextureCache

::UnregisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); + lru_cache.Free(image.lru_index); const auto& clear_page_table = [this, image_id]( u64 page, @@ -1384,7 +1353,7 @@ void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool if (is_modification) { MarkModification(image); } - image.frame_tick = frame_tick; + lru_cache.Touch(image.lru_index, frame_tick); } template diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e4ae351cb2..d7528ed243 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/literals.h" +#include "common/lru_cache.h" #include "video_core/compatible_formats.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/engines/fermi_2d.h" @@ -370,6 +371,12 @@ private: std::vector uncommitted_downloads; std::queue> committed_downloads; + struct LRUItemParams { + using ObjectType = ImageId; + using TickType = u64; + }; + Common::LeastRecentlyUsedCache lru_cache; + static constexpr size_t TICKS_TO_DESTROY = 6; DelayedDestructionRing sentenced_images; DelayedDestructionRing sentenced_image_view; @@ -379,7 +386,6 @@ private: u64 modification_tick = 0; u64 frame_tick = 0; - typename SlotVector::Iterator deletion_iterator; }; } // namespace VideoCommon