diff --git a/rpcs3/Emu/RSX/Common/ranged_map.hpp b/rpcs3/Emu/RSX/Common/ranged_map.hpp new file mode 100644 index 0000000000..24eb4650af --- /dev/null +++ b/rpcs3/Emu/RSX/Common/ranged_map.hpp @@ -0,0 +1,205 @@ +#pragma once + +#include +#include "Utilities/address_range.h" + +#include + +namespace rsx +{ + template + class ranged_map + { + using inner_type = typename std::unordered_map; + using outer_type = typename std::array; + + outer_type m_data; + + static inline u32 block_for(u32 address) + { + return address / BlockSize; + } + + public: + class iterator + { + using super = typename ranged_map; + friend class super; + + protected: + inner_type* m_current = nullptr; + inner_type* m_end = nullptr; + + outer_type* m_data_ptr = nullptr; + inner_type::iterator m_it{}; + + inline void forward_scan() + { + while (m_current < m_end) + { + m_it = (++m_current)->begin(); + if (m_it != m_current->end()) [[ likely ]] + { + return; + } + } + + // end pointer + m_current = nullptr; + m_it = {}; + } + + void next() + { + if (!m_current) + { + return; + } + + if (++m_it != m_current->end()) [[ likely ]] + { + return; + } + + forward_scan(); + } + + inline void begin_range(const utils::address_range& range, inner_type::iterator& where) + { + m_it = where; + m_current = &(*m_data_ptr)[range.start / BlockSize]; + m_end = &(*m_data_ptr)[(range.end + 1) / BlockSize]; + } + + inline void begin_range(u32 address, inner_type::iterator& where) + { + begin_range(utils::address_range::start_length(address, 1), where); + } + + inline void begin_range(const utils::address_range& range) + { + m_current = &(*m_data_ptr)[range.start / BlockSize]; + m_end = &(*m_data_ptr)[(range.end + 1) / BlockSize]; + + --m_current; + forward_scan(); + } + + inline void erase() + { + m_it = m_current->erase(m_it); + if (m_it != m_current->end()) + { + return; + } + + forward_scan(); + } + + iterator(super* parent) + : m_data_ptr(&parent->m_data) + {} + + public: + inline bool operator == (const iterator& other) + { + return m_it == other.m_it; + } + + inline bool operator != (const iterator& other) + { + return m_it != other.m_it; + } + + inline auto* operator -> () + { + ensure(m_current); + return m_it.operator->(); + } + + inline auto& operator * () + { + ensure(m_current); + return m_it.operator*(); + } + + inline auto* operator -> () const + { + ensure(m_current); + return m_it.operator->(); + } + + inline auto& operator * () const + { + ensure(m_current); + return m_it.operator*(); + } + + inline iterator& operator ++ () + { + ensure(m_current); + next(); + return *this; + } + + inline T& operator ++ (int) + { + ensure(m_current); + auto old = *this; + next(); + return old; + } + }; + + inline T& operator[](const u32& key) + { + return m_data[block_for(key)][key]; + } + + inline auto find(const u32& key) + { + auto& block = m_data[block_for(key)]; + iterator ret = { this }; + + if (auto found = block.find(key); + found != block.end()) + { + ret.begin_range(key, found); + } + + return ret; + } + + inline iterator erase(iterator& where) + { + where.erase(); + return where; + } + + inline void erase(u32 address) + { + m_data[block_for(address)].erase(address); + } + + inline iterator begin_range(const utils::address_range& range) + { + iterator ret = { this }; + ret.begin_range(range); + return ret; + } + + inline iterator end() + { + iterator ret = { this }; + return ret; + } + + inline void clear() + { + for (auto& e : m_data) + { + e.clear(); + } + } + }; +} diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index cabce9bd3f..67d0b37544 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -2,6 +2,7 @@ #include "surface_utils.h" #include "simple_array.hpp" +#include "ranged_map.hpp" #include "../gcm_enums.h" #include "../rsx_utils.h" #include @@ -44,10 +45,11 @@ namespace rsx using surface_type = typename Traits::surface_type; using command_list_type = typename Traits::command_list_type; using surface_overlap_info = surface_overlap_info_t; + using surface_ranged_map = typename rsx::ranged_map; protected: - std::unordered_map m_render_targets_storage = {}; - std::unordered_map m_depth_stencil_storage = {}; + surface_ranged_map m_render_targets_storage = {}; + surface_ranged_map m_depth_stencil_storage = {}; rsx::address_range m_render_targets_memory_range; rsx::address_range m_depth_stencil_memory_range; @@ -85,7 +87,7 @@ namespace rsx auto insert_new_surface = [&]( u32 new_address, deferred_clipped_region& region, - std::unordered_map& data) + surface_ranged_map& data) { surface_storage_type sink; surface_type invalidated = 0; @@ -239,16 +241,16 @@ namespace rsx void intersect_surface_region(command_list_type cmd, u32 address, surface_type new_surface, surface_type prev_surface) { auto scan_list = [&new_surface, address](const rsx::address_range& mem_range, - std::unordered_map& data) -> std::vector> + surface_ranged_map& data) -> std::vector> { std::vector> result; - for (const auto &e : data) + for (auto it = data.begin_range(mem_range); it != data.end(); ++it) { - auto surface = Traits::get(e.second); + auto surface = Traits::get(it->second); if (new_surface->last_use_tag >= surface->last_use_tag || new_surface == surface || - address == e.first) + address == it->first) { // Do not bother synchronizing with uninitialized data continue; @@ -257,11 +259,11 @@ namespace rsx // Memory partition check if (mem_range.start >= constants::local_mem_base) { - if (e.first < constants::local_mem_base) continue; + if (it->first < constants::local_mem_base) continue; } else { - if (e.first >= constants::local_mem_base) continue; + if (it->first >= constants::local_mem_base) continue; } // Pitch check @@ -277,8 +279,8 @@ namespace rsx continue; } - result.push_back({ e.first, surface }); - ensure(e.first == surface->base_addr); + result.push_back({ it->first, surface }); + ensure(it->first == surface->base_addr); } return result; @@ -402,7 +404,7 @@ namespace rsx bool store = true; address_range *storage_bounds; - std::unordered_map *primary_storage, *secondary_storage; + surface_ranged_map *primary_storage, *secondary_storage; if constexpr (depth) { primary_storage = &m_depth_stencil_storage; @@ -968,15 +970,15 @@ namespace rsx const auto test_range = utils::address_range::start_length(texaddr, (required_pitch * required_height) - (required_pitch - surface_internal_pitch)); - auto process_list_function = [&](std::unordered_map& data, bool is_depth) + auto process_list_function = [&](surface_ranged_map& data, bool is_depth) { - for (auto& tex_info : data) + for (auto it = data.begin_range(test_range); it != data.end(); ++it) { - const auto range = tex_info.second->get_memory_range(); + const auto range = it->second->get_memory_range(); if (!range.overlaps(test_range)) continue; - auto surface = tex_info.second.get(); + auto surface = it->second.get(); if (access.is_transfer() && access.is_read() && surface->write_through()) { // The surface has no data other than what can be loaded from CPU @@ -1150,18 +1152,18 @@ namespace rsx void invalidate_all() { // Unbind and invalidate all resources - auto free_resource_list = [&](auto &data) + auto free_resource_list = [&](auto &data, const utils::address_range& range) { - for (auto &e : data) + for (auto it = data.begin_range(range); it != data.end(); ++it) { - invalidate(e.second); + invalidate(it->second); } data.clear(); }; - free_resource_list(m_render_targets_storage); - free_resource_list(m_depth_stencil_storage); + free_resource_list(m_render_targets_storage, m_render_targets_memory_range); + free_resource_list(m_depth_stencil_storage, m_depth_stencil_memory_range); ensure(m_active_memory_used == 0); @@ -1175,21 +1177,23 @@ namespace rsx void invalidate_range(const rsx::address_range& range) { - for (auto &rtt : m_render_targets_storage) + for (auto it = m_render_targets_storage.begin_range(range); it != m_render_targets_storage.end(); ++it) { - if (range.overlaps(rtt.second->get_memory_range())) + auto& rtt = it->second; + if (range.overlaps(rtt->get_memory_range())) { - rtt.second->clear_rw_barrier(); - rtt.second->state_flags |= rsx::surface_state_flags::erase_bkgnd; + rtt->clear_rw_barrier(); + rtt->state_flags |= rsx::surface_state_flags::erase_bkgnd; } } - for (auto &ds : m_depth_stencil_storage) + for (auto it = m_depth_stencil_storage.begin_range(range); it != m_depth_stencil_storage.end(); ++it) { - if (range.overlaps(ds.second->get_memory_range())) + auto& ds = it->second; + if (range.overlaps(ds->get_memory_range())) { - ds.second->clear_rw_barrier(); - ds.second->state_flags |= rsx::surface_state_flags::erase_bkgnd; + ds->clear_rw_barrier(); + ds->state_flags |= rsx::surface_state_flags::erase_bkgnd; } } } @@ -1219,9 +1223,9 @@ namespace rsx virtual bool handle_memory_pressure(command_list_type cmd, problem_severity severity) { - auto process_list_function = [&](std::unordered_map& data) + auto process_list_function = [&](surface_ranged_map& data, const utils::address_range& range) { - for (auto It = data.begin(); It != data.end();) + for (auto It = data.begin_range(range); It != data.end();) { auto surface = Traits::get(It->second); if (surface->dirty()) @@ -1250,8 +1254,8 @@ namespace rsx const auto old_usage = m_active_memory_used; // Try and find old surfaces to remove - process_list_function(m_render_targets_storage); - process_list_function(m_depth_stencil_storage); + process_list_function(m_render_targets_storage, m_render_targets_memory_range); + process_list_function(m_depth_stencil_storage, m_depth_stencil_memory_range); return (m_active_memory_used < old_usage); } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp b/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp index 0f23f5c901..5468671264 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp @@ -91,12 +91,12 @@ namespace vk // Drop MSAA resolve/unresolve caches. Only trigger when a hard sync is guaranteed to follow else it will cause even more problems! // 2-pass to ensure resources are available where they are most needed - auto relieve_memory_pressure = [&](const auto& list) + auto relieve_memory_pressure = [&](auto& list, const utils::address_range& range) { - for (auto& surface : list) + for (auto it = list.begin_range(range); it != list.end(); ++it) { - auto& rtt = surface.second; - if (!rtt->spill_request_tag || rtt->spill_request_tag < surface.second->last_rw_access_tag) + auto& rtt = it->second; + if (!rtt->spill_request_tag || rtt->spill_request_tag < rtt->last_rw_access_tag) { // We're not going to be spilling into system RAM. If a MSAA resolve target exists, remove it to save memory. if (rtt->resolve_surface) @@ -151,8 +151,8 @@ namespace vk } // 2. Scan the list and spill resources that can be spilled immediately if requested. Also gather resources from those that don't need it. - relieve_memory_pressure(m_render_targets_storage); - relieve_memory_pressure(m_depth_stencil_storage); + relieve_memory_pressure(m_render_targets_storage, m_render_targets_memory_range); + relieve_memory_pressure(m_depth_stencil_storage, m_depth_stencil_memory_range); // 3. Write to system heap everything marked to spill for (auto& surface : deferred_spills) @@ -251,22 +251,23 @@ namespace vk // Very slow, but should only be called when the situation is dire std::vector sorted_list; - sorted_list.reserve(m_render_targets_storage.size() + m_depth_stencil_storage.size()); + sorted_list.reserve(1024); - auto process_list_function = [&](const auto& list) + auto process_list_function = [&](auto& list, const utils::address_range& range) { - for (auto& surface : list) + for (auto it = list.begin_range(range); it != list.end(); ++it) { // NOTE: Check if memory is available instead of value in case we ran out of memory during unspill - if (surface.second->memory && !surface.second->is_bound) + auto& surface = it->second; + if (surface->memory && !surface->is_bound) { - sorted_list.push_back(surface.second.get()); + sorted_list.push_back(surface.get()); } } }; - process_list_function(m_render_targets_storage); - process_list_function(m_depth_stencil_storage); + process_list_function(m_render_targets_storage, m_render_targets_memory_range); + process_list_function(m_depth_stencil_storage, m_depth_stencil_memory_range); std::sort(sorted_list.begin(), sorted_list.end(), [](const auto& a, const auto& b) { diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 07333f9e76..d81306c4f1 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -480,6 +480,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 2a9cd319ed..e7c753b424 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2062,6 +2062,9 @@ Emu + + Emu\GPU\RSX\Common +