rsx: Optimize surface store for faster scanning

This commit is contained in:
kd-11 2022-03-20 23:03:46 +03:00 committed by kd-11
parent c5748983f6
commit 35ec4de776
5 changed files with 260 additions and 46 deletions

View File

@ -0,0 +1,205 @@
#pragma once
#include <util/types.hpp>
#include "Utilities/address_range.h"
#include <unordered_map>
namespace rsx
{
template<typename T, int BlockSize>
class ranged_map
{
using inner_type = typename std::unordered_map<u32, T>;
using outer_type = typename std::array<inner_type, 0x100000000ull / BlockSize>;
outer_type m_data;
static inline u32 block_for(u32 address)
{
return address / BlockSize;
}
public:
class iterator
{
using super = typename ranged_map<T, BlockSize>;
friend class super;
protected:
inner_type* m_current = nullptr;
inner_type* m_end = nullptr;
outer_type* m_data_ptr = nullptr;
inner_type::iterator m_it{};
inline void forward_scan()
{
while (m_current < m_end)
{
m_it = (++m_current)->begin();
if (m_it != m_current->end()) [[ likely ]]
{
return;
}
}
// end pointer
m_current = nullptr;
m_it = {};
}
void next()
{
if (!m_current)
{
return;
}
if (++m_it != m_current->end()) [[ likely ]]
{
return;
}
forward_scan();
}
inline void begin_range(const utils::address_range& range, inner_type::iterator& where)
{
m_it = where;
m_current = &(*m_data_ptr)[range.start / BlockSize];
m_end = &(*m_data_ptr)[(range.end + 1) / BlockSize];
}
inline void begin_range(u32 address, inner_type::iterator& where)
{
begin_range(utils::address_range::start_length(address, 1), where);
}
inline void begin_range(const utils::address_range& range)
{
m_current = &(*m_data_ptr)[range.start / BlockSize];
m_end = &(*m_data_ptr)[(range.end + 1) / BlockSize];
--m_current;
forward_scan();
}
inline void erase()
{
m_it = m_current->erase(m_it);
if (m_it != m_current->end())
{
return;
}
forward_scan();
}
iterator(super* parent)
: m_data_ptr(&parent->m_data)
{}
public:
inline bool operator == (const iterator& other)
{
return m_it == other.m_it;
}
inline bool operator != (const iterator& other)
{
return m_it != other.m_it;
}
inline auto* operator -> ()
{
ensure(m_current);
return m_it.operator->();
}
inline auto& operator * ()
{
ensure(m_current);
return m_it.operator*();
}
inline auto* operator -> () const
{
ensure(m_current);
return m_it.operator->();
}
inline auto& operator * () const
{
ensure(m_current);
return m_it.operator*();
}
inline iterator& operator ++ ()
{
ensure(m_current);
next();
return *this;
}
inline T& operator ++ (int)
{
ensure(m_current);
auto old = *this;
next();
return old;
}
};
inline T& operator[](const u32& key)
{
return m_data[block_for(key)][key];
}
inline auto find(const u32& key)
{
auto& block = m_data[block_for(key)];
iterator ret = { this };
if (auto found = block.find(key);
found != block.end())
{
ret.begin_range(key, found);
}
return ret;
}
inline iterator erase(iterator& where)
{
where.erase();
return where;
}
inline void erase(u32 address)
{
m_data[block_for(address)].erase(address);
}
inline iterator begin_range(const utils::address_range& range)
{
iterator ret = { this };
ret.begin_range(range);
return ret;
}
inline iterator end()
{
iterator ret = { this };
return ret;
}
inline void clear()
{
for (auto& e : m_data)
{
e.clear();
}
}
};
}

View File

@ -2,6 +2,7 @@
#include "surface_utils.h"
#include "simple_array.hpp"
#include "ranged_map.hpp"
#include "../gcm_enums.h"
#include "../rsx_utils.h"
#include <list>
@ -44,10 +45,11 @@ namespace rsx
using surface_type = typename Traits::surface_type;
using command_list_type = typename Traits::command_list_type;
using surface_overlap_info = surface_overlap_info_t<surface_type>;
using surface_ranged_map = typename rsx::ranged_map<surface_storage_type, 0x100000>;
protected:
std::unordered_map<u32, surface_storage_type> m_render_targets_storage = {};
std::unordered_map<u32, surface_storage_type> m_depth_stencil_storage = {};
surface_ranged_map m_render_targets_storage = {};
surface_ranged_map m_depth_stencil_storage = {};
rsx::address_range m_render_targets_memory_range;
rsx::address_range m_depth_stencil_memory_range;
@ -85,7 +87,7 @@ namespace rsx
auto insert_new_surface = [&](
u32 new_address,
deferred_clipped_region<surface_type>& region,
std::unordered_map<u32, surface_storage_type>& data)
surface_ranged_map& data)
{
surface_storage_type sink;
surface_type invalidated = 0;
@ -239,16 +241,16 @@ namespace rsx
void intersect_surface_region(command_list_type cmd, u32 address, surface_type new_surface, surface_type prev_surface)
{
auto scan_list = [&new_surface, address](const rsx::address_range& mem_range,
std::unordered_map<u32, surface_storage_type>& data) -> std::vector<std::pair<u32, surface_type>>
surface_ranged_map& data) -> std::vector<std::pair<u32, surface_type>>
{
std::vector<std::pair<u32, surface_type>> result;
for (const auto &e : data)
for (auto it = data.begin_range(mem_range); it != data.end(); ++it)
{
auto surface = Traits::get(e.second);
auto surface = Traits::get(it->second);
if (new_surface->last_use_tag >= surface->last_use_tag ||
new_surface == surface ||
address == e.first)
address == it->first)
{
// Do not bother synchronizing with uninitialized data
continue;
@ -257,11 +259,11 @@ namespace rsx
// Memory partition check
if (mem_range.start >= constants::local_mem_base)
{
if (e.first < constants::local_mem_base) continue;
if (it->first < constants::local_mem_base) continue;
}
else
{
if (e.first >= constants::local_mem_base) continue;
if (it->first >= constants::local_mem_base) continue;
}
// Pitch check
@ -277,8 +279,8 @@ namespace rsx
continue;
}
result.push_back({ e.first, surface });
ensure(e.first == surface->base_addr);
result.push_back({ it->first, surface });
ensure(it->first == surface->base_addr);
}
return result;
@ -402,7 +404,7 @@ namespace rsx
bool store = true;
address_range *storage_bounds;
std::unordered_map<u32, surface_storage_type> *primary_storage, *secondary_storage;
surface_ranged_map *primary_storage, *secondary_storage;
if constexpr (depth)
{
primary_storage = &m_depth_stencil_storage;
@ -968,15 +970,15 @@ namespace rsx
const auto test_range = utils::address_range::start_length(texaddr, (required_pitch * required_height) - (required_pitch - surface_internal_pitch));
auto process_list_function = [&](std::unordered_map<u32, surface_storage_type>& data, bool is_depth)
auto process_list_function = [&](surface_ranged_map& data, bool is_depth)
{
for (auto& tex_info : data)
for (auto it = data.begin_range(test_range); it != data.end(); ++it)
{
const auto range = tex_info.second->get_memory_range();
const auto range = it->second->get_memory_range();
if (!range.overlaps(test_range))
continue;
auto surface = tex_info.second.get();
auto surface = it->second.get();
if (access.is_transfer() && access.is_read() && surface->write_through())
{
// The surface has no data other than what can be loaded from CPU
@ -1150,18 +1152,18 @@ namespace rsx
void invalidate_all()
{
// Unbind and invalidate all resources
auto free_resource_list = [&](auto &data)
auto free_resource_list = [&](auto &data, const utils::address_range& range)
{
for (auto &e : data)
for (auto it = data.begin_range(range); it != data.end(); ++it)
{
invalidate(e.second);
invalidate(it->second);
}
data.clear();
};
free_resource_list(m_render_targets_storage);
free_resource_list(m_depth_stencil_storage);
free_resource_list(m_render_targets_storage, m_render_targets_memory_range);
free_resource_list(m_depth_stencil_storage, m_depth_stencil_memory_range);
ensure(m_active_memory_used == 0);
@ -1175,21 +1177,23 @@ namespace rsx
void invalidate_range(const rsx::address_range& range)
{
for (auto &rtt : m_render_targets_storage)
for (auto it = m_render_targets_storage.begin_range(range); it != m_render_targets_storage.end(); ++it)
{
if (range.overlaps(rtt.second->get_memory_range()))
auto& rtt = it->second;
if (range.overlaps(rtt->get_memory_range()))
{
rtt.second->clear_rw_barrier();
rtt.second->state_flags |= rsx::surface_state_flags::erase_bkgnd;
rtt->clear_rw_barrier();
rtt->state_flags |= rsx::surface_state_flags::erase_bkgnd;
}
}
for (auto &ds : m_depth_stencil_storage)
for (auto it = m_depth_stencil_storage.begin_range(range); it != m_depth_stencil_storage.end(); ++it)
{
if (range.overlaps(ds.second->get_memory_range()))
auto& ds = it->second;
if (range.overlaps(ds->get_memory_range()))
{
ds.second->clear_rw_barrier();
ds.second->state_flags |= rsx::surface_state_flags::erase_bkgnd;
ds->clear_rw_barrier();
ds->state_flags |= rsx::surface_state_flags::erase_bkgnd;
}
}
}
@ -1219,9 +1223,9 @@ namespace rsx
virtual bool handle_memory_pressure(command_list_type cmd, problem_severity severity)
{
auto process_list_function = [&](std::unordered_map<u32, surface_storage_type>& data)
auto process_list_function = [&](surface_ranged_map& data, const utils::address_range& range)
{
for (auto It = data.begin(); It != data.end();)
for (auto It = data.begin_range(range); It != data.end();)
{
auto surface = Traits::get(It->second);
if (surface->dirty())
@ -1250,8 +1254,8 @@ namespace rsx
const auto old_usage = m_active_memory_used;
// Try and find old surfaces to remove
process_list_function(m_render_targets_storage);
process_list_function(m_depth_stencil_storage);
process_list_function(m_render_targets_storage, m_render_targets_memory_range);
process_list_function(m_depth_stencil_storage, m_depth_stencil_memory_range);
return (m_active_memory_used < old_usage);
}

View File

@ -91,12 +91,12 @@ namespace vk
// Drop MSAA resolve/unresolve caches. Only trigger when a hard sync is guaranteed to follow else it will cause even more problems!
// 2-pass to ensure resources are available where they are most needed
auto relieve_memory_pressure = [&](const auto& list)
auto relieve_memory_pressure = [&](auto& list, const utils::address_range& range)
{
for (auto& surface : list)
for (auto it = list.begin_range(range); it != list.end(); ++it)
{
auto& rtt = surface.second;
if (!rtt->spill_request_tag || rtt->spill_request_tag < surface.second->last_rw_access_tag)
auto& rtt = it->second;
if (!rtt->spill_request_tag || rtt->spill_request_tag < rtt->last_rw_access_tag)
{
// We're not going to be spilling into system RAM. If a MSAA resolve target exists, remove it to save memory.
if (rtt->resolve_surface)
@ -151,8 +151,8 @@ namespace vk
}
// 2. Scan the list and spill resources that can be spilled immediately if requested. Also gather resources from those that don't need it.
relieve_memory_pressure(m_render_targets_storage);
relieve_memory_pressure(m_depth_stencil_storage);
relieve_memory_pressure(m_render_targets_storage, m_render_targets_memory_range);
relieve_memory_pressure(m_depth_stencil_storage, m_depth_stencil_memory_range);
// 3. Write to system heap everything marked to spill
for (auto& surface : deferred_spills)
@ -251,22 +251,23 @@ namespace vk
// Very slow, but should only be called when the situation is dire
std::vector<render_target*> sorted_list;
sorted_list.reserve(m_render_targets_storage.size() + m_depth_stencil_storage.size());
sorted_list.reserve(1024);
auto process_list_function = [&](const auto& list)
auto process_list_function = [&](auto& list, const utils::address_range& range)
{
for (auto& surface : list)
for (auto it = list.begin_range(range); it != list.end(); ++it)
{
// NOTE: Check if memory is available instead of value in case we ran out of memory during unspill
if (surface.second->memory && !surface.second->is_bound)
auto& surface = it->second;
if (surface->memory && !surface->is_bound)
{
sorted_list.push_back(surface.second.get());
sorted_list.push_back(surface.get());
}
}
};
process_list_function(m_render_targets_storage);
process_list_function(m_depth_stencil_storage);
process_list_function(m_render_targets_storage, m_render_targets_memory_range);
process_list_function(m_depth_stencil_storage, m_depth_stencil_memory_range);
std::sort(sorted_list.begin(), sorted_list.end(), [](const auto& a, const auto& b)
{

View File

@ -480,6 +480,7 @@
<ClInclude Include="Emu\perf_monitor.hpp" />
<ClInclude Include="Emu\RSX\Common\bitfield.hpp" />
<ClInclude Include="Emu\RSX\Common\profiling_timer.hpp" />
<ClInclude Include="Emu\RSX\Common\ranged_map.hpp" />
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" />
<ClInclude Include="Emu\RSX\Common\time.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_edit_text.hpp" />

View File

@ -2062,6 +2062,9 @@
<ClInclude Include="Emu\perf_monitor.hpp">
<Filter>Emu</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\ranged_map.hpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Common\Interpreter\FragmentInterpreter.glsl">