From dca3d477c932ba8bce459c9af21d517e3520fc23 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 5 Feb 2022 21:11:56 +0300 Subject: [PATCH] vk: Use image hot-cache for faster allocation times - Creating new images is expensive. - We can keep around a set of images that have been recently discarded and use them instead of creating new ones from scratch each time. --- rpcs3/Emu/RSX/Common/texture_cache.h | 6 + rpcs3/Emu/RSX/VK/VKOverlays.cpp | 7 ++ rpcs3/Emu/RSX/VK/VKResourceManager.h | 17 +++ rpcs3/Emu/RSX/VK/VKTextureCache.cpp | 177 +++++++++++++++------------ rpcs3/Emu/RSX/VK/VKTextureCache.h | 63 +++------- 5 files changed, 143 insertions(+), 127 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index b3317d56b2..0f4058210e 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1021,6 +1021,12 @@ namespace rsx virtual void on_frame_end() { + // Must manually release each cached entry + for (auto& entry : m_temporary_subresource_cache) + { + release_temporary_subresource(entry.second.second); + } + m_temporary_subresource_cache.clear(); m_predictor.on_frame_end(); reset_frame_statistics(); diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.cpp b/rpcs3/Emu/RSX/VK/VKOverlays.cpp index dcef14c537..419e70f959 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.cpp +++ b/rpcs3/Emu/RSX/VK/VKOverlays.cpp @@ -589,6 +589,13 @@ namespace vk for (const auto& _key : keys_to_remove) { + auto& img_data = temp_image_cache[_key]; + auto& view_data = temp_view_cache[_key]; + + auto gc = vk::get_resource_manager(); + gc->dispose(img_data.second); + gc->dispose(view_data); + temp_image_cache.erase(_key); temp_view_cache.erase(_key); } diff --git a/rpcs3/Emu/RSX/VK/VKResourceManager.h b/rpcs3/Emu/RSX/VK/VKResourceManager.h index 8920fb4f94..a54b90ed23 100644 --- a/rpcs3/Emu/RSX/VK/VKResourceManager.h +++ b/rpcs3/Emu/RSX/VK/VKResourceManager.h @@ -14,6 +14,11 @@ namespace vk u64 last_completed_event_id(); void on_event_completed(u64 event_id, bool flush = false); + struct disposable_t + { + virtual void dispose() = 0; + }; + struct eid_scope_t { u64 eid; @@ -24,6 +29,7 @@ namespace vk std::vector> m_disposed_events; std::vector> m_disposed_query_pools; std::vector> m_disposed_samplers; + std::vector> m_disposables; eid_scope_t(u64 _eid): eid(_eid), m_device(g_render_device) @@ -42,6 +48,12 @@ namespace vk m_disposed_images.clear(); m_disposed_query_pools.clear(); m_disposed_samplers.clear(); + + for (auto& disposable : m_disposables) + { + disposable->dispose(); + } + m_disposables.clear(); } }; @@ -185,6 +197,11 @@ namespace vk get_current_eid_scope().m_disposed_samplers.emplace_back(std::move(sampler)); } + void dispose(std::unique_ptr& disposable) + { + get_current_eid_scope().m_disposables.emplace_back(std::move(disposable)); + } + void eid_completed(u64 eid) { while (!m_eid_map.empty()) diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp index bbae8efa41..e913fdf439 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.cpp +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.cpp @@ -7,6 +7,23 @@ namespace vk { + texture_cache::cached_image_reference_t::cached_image_reference_t(texture_cache* parent, std::unique_ptr& previous) + { + this->parent = parent; + this->data = std::move(previous); + } + + void texture_cache::cached_image_reference_t::dispose() + { + // Erase layout information to force TOP_OF_PIPE transition next time. + data->current_layout = VK_IMAGE_LAYOUT_UNDEFINED; + + // Move this object to the cached image pool + std::lock_guard lock(parent->m_cached_pool_lock); + parent->m_cached_memory_size += data->memory->size(); + parent->m_cached_images.emplace_front(std::move(data)); + } + void cached_texture_section::dma_transfer(vk::command_buffer& cmd, vk::image* src, const areai& src_area, const utils::address_range& valid_range, u32 pitch) { ensure(src->samples() == 1); @@ -167,7 +184,8 @@ namespace vk { if (tex.is_managed()) { - vk::get_resource_manager()->dispose(tex.get_texture()); + auto disposable = std::unique_ptr(new cached_image_reference_t(this, tex.get_texture())); + vk::get_resource_manager()->dispose(disposable); } } @@ -175,8 +193,8 @@ namespace vk { baseclass::clear(); - m_temporary_storage.clear(); - m_temporary_memory_size = 0; + m_cached_images.clear(); + m_cached_memory_size = 0; } void texture_cache::copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector& sections_to_transfer) const @@ -449,36 +467,47 @@ namespace vk return result; } - std::unique_ptr texture_cache::find_temporary_image(VkFormat format, u16 w, u16 h, u16 d, u8 mipmaps) + std::unique_ptr texture_cache::find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u8 mipmaps, VkFlags flags) { - for (auto& e : m_temporary_storage) + auto hash_properties = [](VkFormat format, u16 w, u16 h, u16 d, u8 mipmaps, VkFlags flags) { - if (e.can_reuse && e.matches(format, w, h, d, mipmaps, 0)) + ensure(static_cast(format) < 0xFF); + return (static_cast(format) & 0xFF) | + (static_cast(w) << 8) | + (static_cast(h) << 24) | + (static_cast(d) << 40) | + (static_cast(mipmaps) << 48) | + (static_cast(flags) << 56); + }; + + reader_lock lock(m_cached_pool_lock); + + if (!m_cached_images.empty()) + { + const u64 desired_key = hash_properties(format, w, h, d, mipmaps, flags); + lock.upgrade(); + + for (auto it = m_cached_images.begin(); it != m_cached_images.end(); ++it) { - m_temporary_memory_size -= e.block_size; - e.block_size = 0; - e.can_reuse = false; - return std::move(e.combined_image); + const auto& info = (*it)->info; + const u64 this_key = hash_properties(info.format, info.extent.width, info.extent.height, info.extent.depth, info.mipLevels, info.flags); + + if (this_key == desired_key) + { + auto ret = std::move(*it); + m_cached_images.erase(it); + m_cached_memory_size -= ret->memory->size(); + return ret; + } } } return {}; } - std::unique_ptr texture_cache::find_temporary_cubemap(VkFormat format, u16 size) + std::unique_ptr texture_cache::find_cached_cubemap(VkFormat format, u16 size) { - for (auto& e : m_temporary_storage) - { - if (e.can_reuse && e.matches(format, size, size, 1, 1, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) - { - m_temporary_memory_size -= e.block_size; - e.block_size = 0; - e.can_reuse = false; - return std::move(e.combined_image); - } - } - - return {}; + return find_cached_image(format, size, size, 1, 1, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT); } vk::image_view* texture_cache::create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, @@ -492,11 +521,11 @@ namespace vk if (!image_flags) [[likely]] { - image = find_temporary_image(dst_format, w, h, 1, mips); + image = find_cached_image(dst_format, w, h, 1, mips, 0); } else { - image = find_temporary_cubemap(dst_format, w); + image = find_cached_cubemap(dst_format, w); layers = 6; } @@ -551,11 +580,8 @@ namespace vk vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); } - const u32 resource_memory = w * h * 4; //Rough approximate - m_temporary_storage.emplace_back(image); - m_temporary_storage.back().block_size = resource_memory; - m_temporary_memory_size += resource_memory; - + // TODO: Floating reference. We can do better with some restructuring. + image.release(); return view; } @@ -715,15 +741,12 @@ namespace vk void texture_cache::release_temporary_subresource(vk::image_view* view) { - auto handle = dynamic_cast(view->image()); - for (auto& e : m_temporary_storage) - { - if (e.combined_image.get() == handle) - { - e.can_reuse = true; - return; - } - } + auto resource = dynamic_cast(view->image()); + ensure(resource); + + auto image = std::unique_ptr(resource); + auto disposable = std::unique_ptr(new cached_image_reference_t(this, image)); + vk::get_resource_manager()->dispose(disposable); } void texture_cache::update_image_contents(vk::command_buffer& cmd, vk::image_view* dst_view, vk::image* src, u16 width, u16 height) @@ -826,13 +849,21 @@ namespace vk { const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap; const VkFormat vk_format = get_compatible_sampler_format(m_formats_support, gcm_format); + const VkFlags flags = is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0; - image = new vk::viewable_image(*m_device, - m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - image_type, vk_format, - width, height, depth, mipmaps, layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, usage_flags, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0, - VMM_ALLOCATION_POOL_TEXTURE_CACHE, rsx::classify_format(gcm_format)); + if (auto found = find_cached_image(vk_format, width, height, depth, mipmaps, flags)) + { + image = found.release(); + } + else + { + image = new vk::viewable_image(*m_device, + m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + image_type, vk_format, + width, height, depth, mipmaps, layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TILING_OPTIMAL, usage_flags, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0, + VMM_ALLOCATION_POOL_TEXTURE_CACHE, rsx::classify_format(gcm_format)); + } // New section, we must prepare it region.reset(rsx_range); @@ -1135,14 +1166,14 @@ namespace vk auto any_released = baseclass::handle_memory_pressure(severity); // TODO: This can cause invalidation of in-flight resources - if (severity <= rsx::problem_severity::low || !m_temporary_memory_size) + if (severity <= rsx::problem_severity::low || !m_cached_memory_size) { // Nothing left to do return any_released; } constexpr u64 _1M = 0x100000; - if (severity <= rsx::problem_severity::moderate && m_temporary_memory_size < (64 * _1M)) + if (severity <= rsx::problem_severity::moderate && m_cached_memory_size < (64 * _1M)) { // Some memory is consumed by the temporary resources, but no need to panic just yet return any_released; @@ -1159,20 +1190,9 @@ namespace vk auto gc = vk::get_resource_manager(); u64 actual_released_memory = 0; - for (auto& entry : m_temporary_storage) - { - if (!entry.combined_image) - { - continue; - } + m_cached_images.clear(); + m_cached_memory_size = 0; - actual_released_memory += entry.combined_image->memory->size(); - gc->dispose(entry.combined_image); - m_temporary_memory_size -= entry.block_size; - } - - ensure(m_temporary_memory_size == 0); - m_temporary_storage.clear(); m_temporary_subresource_cache.clear(); rsx_log.warning("Texture cache released %lluM of temporary resources.", (actual_released_memory / _1M)); @@ -1183,27 +1203,27 @@ namespace vk { trim_sections(); - if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects || - m_temporary_memory_size > 0x4000000) //If already holding over 64M in discardable memory, be frugal with memory resources + if (m_storage.m_unreleased_texture_objects >= m_max_zombie_objects) { purge_unreleased_sections(); } - const u64 last_complete_frame = vk::get_last_completed_frame_id(); - m_temporary_storage.remove_if([&](const temporary_storage& o) + if (m_cached_images.size() > max_cached_image_pool_size || + m_cached_memory_size > 256 * 0x100000) { - if (!o.block_size || o.test(last_complete_frame)) - { - m_temporary_memory_size -= o.block_size; - return true; - } - return false; - }); + std::lock_guard lock(m_cached_pool_lock); - m_temporary_subresource_cache.clear(); - reset_frame_statistics(); + const auto new_size = m_cached_images.size() / 2; + for (usz i = new_size; i < m_cached_images.size(); ++i) + { + m_cached_memory_size -= m_cached_images[i]->memory->size(); + } + + m_cached_images.resize(new_size); + } baseclass::on_frame_end(); + reset_frame_statistics(); } vk::viewable_image* texture_cache::upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch) @@ -1266,10 +1286,9 @@ namespace vk vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); auto result = image.get(); - const u32 resource_memory = width * height * 4; //Rough approximate - m_temporary_storage.emplace_back(image); - m_temporary_storage.back().block_size = resource_memory; - m_temporary_memory_size += resource_memory; + const u32 resource_memory = image->memory->size(); + auto disposable = std::unique_ptr(new cached_image_reference_t(this, image)); + vk::get_resource_manager()->dispose(disposable); return result; } @@ -1294,12 +1313,12 @@ namespace vk u32 texture_cache::get_unreleased_textures_count() const { - return baseclass::get_unreleased_textures_count() + ::size32(m_temporary_storage); + return baseclass::get_unreleased_textures_count() + ::size32(m_cached_images); } u32 texture_cache::get_temporary_memory_in_use() const { - return m_temporary_memory_size; + return m_cached_memory_size; } bool texture_cache::is_overallocated() const diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 61d6db662c..63f0f83e1c 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -350,56 +350,21 @@ namespace vk } }; - struct temporary_storage - { - std::unique_ptr combined_image; - bool can_reuse = false; - - // Memory held by this temp storage object - u32 block_size = 0; - - // Frame id tag - const u64 frame_tag = vk::get_current_frame_id(); - - temporary_storage(std::unique_ptr& _img) - { - combined_image = std::move(_img); - } - - temporary_storage(vk::cached_texture_section& tex) - { - combined_image = std::move(tex.get_texture()); - block_size = tex.get_section_size(); - } - - bool test(u64 ref_frame) const - { - return ref_frame > 0 && frame_tag <= ref_frame; - } - - bool matches(VkFormat format, u16 w, u16 h, u16 d, u16 mipmaps, VkFlags flags) const - { - if (combined_image && - combined_image->info.flags == flags && - combined_image->format() == format && - combined_image->width() == w && - combined_image->height() == h && - combined_image->depth() == d && - combined_image->mipmaps() == mipmaps) - { - return true; - } - - return false; - } - }; - class texture_cache : public rsx::texture_cache { private: using baseclass = rsx::texture_cache; friend baseclass; + struct cached_image_reference_t : vk::disposable_t + { + std::unique_ptr data; + texture_cache* parent; + + cached_image_reference_t(texture_cache* parent, std::unique_ptr& previous); + void dispose() override; + }; + public: enum texture_create_flags : u32 { @@ -418,8 +383,10 @@ namespace vk vk::data_heap* m_texture_upload_heap; //Stuff that has been dereferenced goes into these - std::list m_temporary_storage; - atomic_t m_temporary_memory_size = { 0 }; + const u32 max_cached_image_pool_size = 256; + std::deque> m_cached_images; + atomic_t m_cached_memory_size = { 0 }; + shared_mutex m_cached_pool_lock; void clear(); @@ -429,9 +396,9 @@ namespace vk vk::image* get_template_from_collection_impl(const std::vector& sections_to_transfer) const; - std::unique_ptr find_temporary_image(VkFormat format, u16 w, u16 h, u16 d, u8 mipmaps); + std::unique_ptr find_cached_image(VkFormat format, u16 w, u16 h, u16 d, u8 mipmaps, VkFlags flags); - std::unique_ptr find_temporary_cubemap(VkFormat format, u16 size); + std::unique_ptr find_cached_cubemap(VkFormat format, u16 size); protected: vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type,