vk: Try to spread memory usage evenly across compatible types if possible

- Avoids running into budget constraints if you just dump everything into one heap
This commit is contained in:
kd-11 2021-07-25 22:14:47 +03:00 committed by kd-11
parent 6a9d1edee1
commit 13abe785a9
5 changed files with 87 additions and 11 deletions

View File

@ -109,6 +109,7 @@ void VKGSRender::advance_queued_frames()
check_present_status(); check_present_status();
// Run video memory balancer // Run video memory balancer
m_device->rebalance_memory_type_usage();
vk::vmm_check_memory_usage(); vk::vmm_check_memory_usage();
// m_rtts storage is double buffered and should be safe to tag on frame boundary // m_rtts storage is double buffered and should be safe to tag on frame boundary

View File

@ -637,6 +637,12 @@ namespace vk
return dev; return dev;
} }
void render_device::rebalance_memory_type_usage()
{
// Rebalance device local memory types
memory_map.device_local.rebalance();
}
// Shared Util // Shared Util
memory_type_mapping get_memory_mapping(const vk::physical_device& dev) memory_type_mapping get_memory_mapping(const vk::physical_device& dev)
{ {
@ -657,7 +663,7 @@ namespace vk
if (is_device_local) if (is_device_local)
{ {
// Allow multiple device_local heaps // Allow multiple device_local heaps
result.device_local.push(i); result.device_local.push(i, heap.size);
result.device_local_total_bytes += heap.size; result.device_local_total_bytes += heap.size;
} }
@ -670,7 +676,7 @@ namespace vk
if ((is_cached && !host_visible_cached) || (result.host_visible_total_bytes < heap.size)) if ((is_cached && !host_visible_cached) || (result.host_visible_total_bytes < heap.size))
{ {
// Allow only a single host_visible heap. It makes no sense to have multiple of these otherwise // Allow only a single host_visible heap. It makes no sense to have multiple of these otherwise
result.host_visible_coherent = i; result.host_visible_coherent = { i, heap.size };
result.host_visible_total_bytes = heap.size; result.host_visible_total_bytes = heap.size;
host_visible_cached = is_cached; host_visible_cached = is_cached;
} }

View File

@ -120,6 +120,7 @@ namespace vk
const VkFormatProperties get_format_properties(VkFormat format); const VkFormatProperties get_format_properties(VkFormat format);
bool get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32* type_index) const; bool get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32* type_index) const;
void rebalance_memory_type_usage();
const physical_device& gpu() const; const physical_device& gpu() const;
const memory_type_mapping& get_memory_mapping() const; const memory_type_mapping& get_memory_mapping() const;

View File

@ -13,13 +13,14 @@ namespace
namespace vk namespace vk
{ {
memory_type_info::memory_type_info(u32 index) memory_type_info::memory_type_info(u32 index, u64 size)
{ {
push(index); push(index, size);
} }
void memory_type_info::push(u32 index) void memory_type_info::push(u32 index, u64 size)
{ {
type_ids.push_back(index); type_ids.push_back(index);
type_sizes.push_back(size);
} }
memory_type_info::const_iterator memory_type_info::begin() const memory_type_info::const_iterator memory_type_info::begin() const
@ -38,6 +39,11 @@ namespace vk
return type_ids.front(); return type_ids.front();
} }
size_t memory_type_info::count() const
{
return type_ids.size();
}
memory_type_info::operator bool() const memory_type_info::operator bool() const
{ {
return !type_ids.empty(); return !type_ids.empty();
@ -46,11 +52,11 @@ namespace vk
memory_type_info memory_type_info::get(const render_device& dev, u32 access_flags, u32 type_mask) const memory_type_info memory_type_info::get(const render_device& dev, u32 access_flags, u32 type_mask) const
{ {
memory_type_info result{}; memory_type_info result{};
for (const auto& type : type_ids) for (size_t i = 0; i < type_ids.size(); ++i)
{ {
if (type_mask & (1 << type)) if (type_mask & (1 << type_ids[i]))
{ {
result.push(type); result.push(type_ids[i], type_sizes[i]);
} }
} }
@ -59,13 +65,71 @@ namespace vk
u32 type; u32 type;
if (dev.get_compatible_memory_type(type_mask, access_flags, &type)) if (dev.get_compatible_memory_type(type_mask, access_flags, &type))
{ {
result = type; result = { type, 0ull };
} }
} }
return result; return result;
} }
void memory_type_info::rebalance()
{
// Re-order indices with the least used one first.
// This will avoid constant pressure on the memory budget in low memory systems.
if (type_ids.size() <= 1)
{
// Nothing to do
return;
}
std::vector<std::pair<u32, u64>> free_memory_map;
const auto num_types = type_ids.size();
u64 last_free = UINT64_MAX;
bool to_reorder = false;
for (u32 i = 0; i < num_types; ++i)
{
const auto heap_size = type_sizes[i];
const auto type_id = type_ids[i];
ensure(heap_size > 0);
const u64 used_mem = vmm_get_application_memory_usage({ type_ids[i], 0ull });
const u64 free_mem = (used_mem >= heap_size) ? 0ull : (heap_size - used_mem);
to_reorder |= (free_mem > last_free);
last_free = free_mem;
free_memory_map.push_back({ i, free_mem });
}
if (!to_reorder) [[likely]]
{
return;
}
ensure(free_memory_map.size() == num_types);
std::sort(free_memory_map.begin(), free_memory_map.end(), [](const auto& a, const auto& b)
{
return a.second > b.second;
});
std::vector<u32> new_type_ids(num_types);
std::vector<u64> new_type_sizes(num_types);
for (u32 i = 0; i < num_types; ++i)
{
const u32 ref = free_memory_map[i].first;
new_type_ids[i] = type_ids[ref];
new_type_sizes[i] = type_sizes[ref];
}
type_ids = new_type_ids;
type_sizes = new_type_sizes;
rsx_log.warning("Rebalanced memory types successfully");
}
mem_allocator_vma::mem_allocator_vma(VkDevice dev, VkPhysicalDevice pdev) : mem_allocator_base(dev, pdev) mem_allocator_vma::mem_allocator_vma(VkDevice dev, VkPhysicalDevice pdev) : mem_allocator_base(dev, pdev)
{ {
// Initialize stats pool // Initialize stats pool

View File

@ -27,21 +27,25 @@ namespace vk
class memory_type_info class memory_type_info
{ {
std::vector<u32> type_ids; std::vector<u32> type_ids;
std::vector<u64> type_sizes;
public: public:
memory_type_info() = default; memory_type_info() = default;
memory_type_info(u32 index); memory_type_info(u32 index, u64 size);
void push(u32 index); void push(u32 index, u64 size);
using iterator = u32*; using iterator = u32*;
using const_iterator = const u32*; using const_iterator = const u32*;
const_iterator begin() const; const_iterator begin() const;
const_iterator end() const; const_iterator end() const;
u32 first() const; u32 first() const;
size_t count() const;
operator bool() const; operator bool() const;
memory_type_info get(const render_device& dev, u32 access_flags, u32 type_mask) const; memory_type_info get(const render_device& dev, u32 access_flags, u32 type_mask) const;
void rebalance();
}; };
class mem_allocator_base class mem_allocator_base