vk: Implement heap aggregation

- Aggregate heaps of same type under one object and allow allocator to try each matching type.
2021-07-20 18:05:12 +03:00 · 2021-07-20 18:05:12 +03:00 · 59e7379010
parent be9be6e5c9
commit 59e7379010
5 changed files with 143 additions and 52 deletions
--- a/rpcs3/Emu/RSX/VK/VKResourceManager.cpp
+++ b/rpcs3/Emu/RSX/VK/VKResourceManager.cpp
@ -109,9 +109,21 @@ namespace vk
 		g_last_completed_event = 0;
 	}

-	u64 vmm_get_application_memory_usage(u32 memory_type)
+	u64 vmm_get_application_memory_usage(const memory_type_info& memory_type)
 	{
-		return g_vmm_stats.memory_usage[memory_type];
+		u64 result = 0;
+		for (const auto& memory_type_index : memory_type)
+		{
+			auto it = g_vmm_stats.memory_usage.find(memory_type_index);
+			if (it == g_vmm_stats.memory_usage.end())
+			{
+				continue;
+			}
+
+			result += it->second.observe();
+		}
+
+		return result;
 	}

 	u64 vmm_get_application_pool_usage(vmm_allocation_pool pool)
--- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp
+++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp
@ -645,8 +645,6 @@ namespace vk
 		vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties);

 		memory_type_mapping result;
-		result.device_local = VK_MAX_MEMORY_TYPES;
-		result.host_visible_coherent = VK_MAX_MEMORY_TYPES;
 		result.device_local_total_bytes = 0;
 		result.host_visible_total_bytes = 0;
 		bool host_visible_cached = false;
@ -658,11 +656,9 @@ namespace vk
 			bool is_device_local = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
 			if (is_device_local)
 			{
-				if (result.device_local_total_bytes < heap.size)
-				{
-					result.device_local = i;
-					result.device_local_total_bytes = heap.size;
-				}
+				// Allow multiple device_local heaps
+				result.device_local.push(i);
+				result.device_local_total_bytes += heap.size;
 			}

 			bool is_host_visible = !!(memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
@ -673,6 +669,7 @@ namespace vk
 			{
 				if ((is_cached && !host_visible_cached) || (result.host_visible_total_bytes < heap.size))
 				{
+					// Allow only a single host_visible heap. It makes no sense to have multiple of these otherwise
 					result.host_visible_coherent = i;
 					result.host_visible_total_bytes = heap.size;
 					host_visible_cached = is_cached;
@ -680,9 +677,9 @@ namespace vk
 			}
 		}

-		if (result.device_local == VK_MAX_MEMORY_TYPES)
+		if (!result.device_local)
 			fmt::throw_exception("GPU doesn't support device local memory");
-		if (result.host_visible_coherent == VK_MAX_MEMORY_TYPES)
+		if (!result.host_visible_coherent)
 			fmt::throw_exception("GPU doesn't support host coherent device local memory");
 		return result;
 	}
--- a/rpcs3/Emu/RSX/VK/vkutils/device.h
+++ b/rpcs3/Emu/RSX/VK/vkutils/device.h
@ -28,8 +28,8 @@ namespace vk

 	struct memory_type_mapping
 	{
-		u32 host_visible_coherent;
-		u32 device_local;
+		memory_type_info host_visible_coherent;
+		memory_type_info device_local;

 		u64 device_local_total_bytes;
 		u64 host_visible_total_bytes;
--- a/rpcs3/Emu/RSX/VK/vkutils/memory.cpp
+++ b/rpcs3/Emu/RSX/VK/vkutils/memory.cpp
@ -13,6 +13,32 @@ namespace

 namespace vk
 {
+	memory_type_info::memory_type_info(u32 index)
+		: num_entries(0)
+	{
+		push(index);
+	}
+	void memory_type_info::push(u32 index)
+	{
+		ensure(num_entries < pools.size());
+		pools[num_entries++] = index;
+	}
+
+	memory_type_info::const_iterator memory_type_info::begin() const
+	{
+		return pools.data();
+	}
+
+	memory_type_info::const_iterator memory_type_info::end() const
+	{
+		return pools.data() + num_entries;
+	}
+
+	memory_type_info::operator bool() const
+	{
+		return (num_entries > 0);
+	}
+
 	mem_allocator_vma::mem_allocator_vma(VkDevice dev, VkPhysicalDevice pdev) : mem_allocator_base(dev, pdev)
 	{
 		// Initialize stats pool
@ -33,40 +59,58 @@ namespace vk
 		vmaDestroyAllocator(m_allocator);
 	}

-	mem_allocator_vk::mem_handle_t mem_allocator_vma::alloc(u64 block_sz, u64 alignment, u32 memory_type_index, vmm_allocation_pool pool)
+	mem_allocator_vk::mem_handle_t mem_allocator_vma::alloc(u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool)
 	{
 		VmaAllocation vma_alloc;
 		VkMemoryRequirements mem_req = {};
 		VmaAllocationCreateInfo create_info = {};
+		VkResult error_code;

-		mem_req.memoryTypeBits = 1u << memory_type_index;
-		mem_req.size = ::align2(block_sz, alignment);
-		mem_req.alignment = alignment;
-		create_info.memoryTypeBits = 1u << memory_type_index;
-		create_info.flags = m_allocation_flags;
-
-		if (VkResult result = vmaAllocateMemory(m_allocator, &mem_req, &create_info, &vma_alloc, nullptr);
-			result != VK_SUCCESS)
+		auto do_vma_alloc = [&]() -> std::tuple<VkResult, u32>
 		{
-			if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY &&
-				vmm_handle_memory_pressure(rsx::problem_severity::fatal))
+			for (const auto& memory_type_index : memory_type)
 			{
-				// If we just ran out of VRAM, attempt to release resources and try again
-				result = vmaAllocateMemory(m_allocator, &mem_req, &create_info, &vma_alloc, nullptr);
+				mem_req.memoryTypeBits = 1u << memory_type_index;
+				mem_req.size = ::align2(block_sz, alignment);
+				mem_req.alignment = alignment;
+				create_info.memoryTypeBits = 1u << memory_type_index;
+				create_info.flags = m_allocation_flags;
+
+				error_code = vmaAllocateMemory(m_allocator, &mem_req, &create_info, &vma_alloc, nullptr);
+				if (error_code == VK_SUCCESS)
+				{
+					return { VK_SUCCESS, memory_type_index };
+				}
 			}

-			if (result != VK_SUCCESS)
+			return { error_code, ~0u };
+		};
+
+		// On successful allocation, simply tag the transaction and carry on.
+		{
+			const auto [status, type] = do_vma_alloc();
+			if (status == VK_SUCCESS)
 			{
-				die_with_error(result);
-			}
-			else
-			{
-				rsx_log.warning("Renderer ran out of video memory but successfully recovered.");
+				vmm_notify_memory_allocated(vma_alloc, type, block_sz, pool);
+				return vma_alloc;
 			}
 		}

-		vmm_notify_memory_allocated(vma_alloc, memory_type_index, block_sz, pool);
-		return vma_alloc;
+		if (error_code == VK_ERROR_OUT_OF_DEVICE_MEMORY &&
+			vmm_handle_memory_pressure(rsx::problem_severity::fatal))
+		{
+			// Out of memory. Try again.
+			const auto [status, type] = do_vma_alloc();
+			if (status == VK_SUCCESS)
+			{
+				rsx_log.warning("Renderer ran out of video memory but successfully recovered.");
+				vmm_notify_memory_allocated(vma_alloc, type, block_sz, pool);
+				return vma_alloc;
+			}
+		}
+
+		die_with_error(error_code);
+		fmt::throw_exception("Unreachable! Error_code=0x%x", static_cast<u32>(error_code));
 	}

 	void mem_allocator_vma::free(mem_handle_t mem_handle)
@ -136,34 +180,54 @@ namespace vk
 		m_allocation_flags = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT;
 	}

-	mem_allocator_vk::mem_handle_t mem_allocator_vk::alloc(u64 block_sz, u64 /*alignment*/, u32 memory_type_index, vmm_allocation_pool pool)
+	mem_allocator_vk::mem_handle_t mem_allocator_vk::alloc(u64 block_sz, u64 /*alignment*/, const memory_type_info& memory_type, vmm_allocation_pool pool)
 	{
+		VkResult error_code;
 		VkDeviceMemory memory;
+
 		VkMemoryAllocateInfo info = {};
 		info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
 		info.allocationSize = block_sz;
-		info.memoryTypeIndex = memory_type_index;

-		if (VkResult result = vkAllocateMemory(m_device, &info, nullptr, &memory); result != VK_SUCCESS)
+		auto do_vk_alloc = [&]() -> std::tuple<VkResult, u32>
 		{
-			if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY && vmm_handle_memory_pressure(rsx::problem_severity::fatal))
+			for (const auto& memory_type_index : memory_type)
 			{
-				// If we just ran out of VRAM, attempt to release resources and try again
-				result = vkAllocateMemory(m_device, &info, nullptr, &memory);
+				info.memoryTypeIndex = memory_type_index;
+				error_code = vkAllocateMemory(m_device, &info, nullptr, &memory);
+				if (error_code == VK_SUCCESS)
+				{
+					return { error_code, memory_type_index };
+				}
 			}

-			if (result != VK_SUCCESS)
+			return { error_code, ~0u };
+		};
+
+		{
+			const auto [status, type] = do_vk_alloc();
+			if (status == VK_SUCCESS)
 			{
-				die_with_error(result);
-			}
-			else
-			{
-				rsx_log.warning("Renderer ran out of video memory but successfully recovered.");
+				vmm_notify_memory_allocated(memory, type, block_sz, pool);
+				return memory;
 			}
 		}

-		vmm_notify_memory_allocated(memory, memory_type_index, block_sz, pool);
-		return memory;
+		if (error_code == VK_ERROR_OUT_OF_DEVICE_MEMORY &&
+			vmm_handle_memory_pressure(rsx::problem_severity::fatal))
+		{
+			// Out of memory. Try again.
+			const auto [status, type] = do_vk_alloc();
+			if (status == VK_SUCCESS)
+			{
+				rsx_log.warning("Renderer ran out of video memory but successfully recovered.");
+				vmm_notify_memory_allocated(memory, type, block_sz, pool);
+				return memory;
+			}
+		}
+
+		die_with_error(error_code);
+		fmt::throw_exception("Unreachable! Error_code=0x%x", static_cast<u32>(error_code));
 	}

 	void mem_allocator_vk::free(mem_handle_t mem_handle)
--- a/rpcs3/Emu/RSX/VK/vkutils/memory.h
+++ b/rpcs3/Emu/RSX/VK/vkutils/memory.h
@ -23,6 +23,24 @@ namespace vk

 	using namespace vk::vmm_allocation_pool_;

+	class memory_type_info
+	{
+		std::array<u32, 4> pools;
+		u32 num_entries = 0;
+
+	public:
+		memory_type_info() = default;
+		memory_type_info(u32 index);
+		void push(u32 index);
+
+		using iterator = u32*;
+		using const_iterator = const u32*;
+		const_iterator begin() const;
+		const_iterator end() const;
+
+		operator bool() const;
+	};
+
 	class mem_allocator_base
 	{
 	public:
@ -33,7 +51,7 @@ namespace vk

 		virtual void destroy() = 0;

-		virtual mem_handle_t alloc(u64 block_sz, u64 alignment, u32 memory_type_index, vmm_allocation_pool pool) = 0;
+		virtual mem_handle_t alloc(u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool) = 0;
 		virtual void free(mem_handle_t mem_handle) = 0;
 		virtual void* map(mem_handle_t mem_handle, u64 offset, u64 size) = 0;
 		virtual void unmap(mem_handle_t mem_handle) = 0;
@ -61,7 +79,7 @@ namespace vk

 		void destroy() override;

-		mem_handle_t alloc(u64 block_sz, u64 alignment, u32 memory_type_index, vmm_allocation_pool pool) override;
+		mem_handle_t alloc(u64 block_sz, u64 alignment, const memory_type_info& memory_type, vmm_allocation_pool pool) override;

 		void free(mem_handle_t mem_handle) override;
 		void* map(mem_handle_t mem_handle, u64 offset, u64 /*size*/) override;
@ -90,7 +108,7 @@ namespace vk

 		void destroy() override {}

-		mem_handle_t alloc(u64 block_sz, u64 /*alignment*/, u32 memory_type_index, vmm_allocation_pool pool) override;
+		mem_handle_t alloc(u64 block_sz, u64 /*alignment*/, const memory_type_info& memory_type, vmm_allocation_pool pool) override;

 		void free(mem_handle_t mem_handle) override;
 		void* map(mem_handle_t mem_handle, u64 offset, u64 size) override;
@ -151,7 +169,7 @@ namespace vk
 	void vmm_notify_memory_freed(void* handle);
 	void vmm_reset();
 	void vmm_check_memory_usage();
-	u64  vmm_get_application_memory_usage(u32 memory_type);
+	u64  vmm_get_application_memory_usage(const memory_type_info& memory_type);
 	u64  vmm_get_application_pool_usage(vmm_allocation_pool pool);
 	bool vmm_handle_memory_pressure(rsx::problem_severity severity);
 	rsx::problem_severity vmm_determine_memory_load_severity();