vulkan; Add a weak vertex cache with single frame validity

2017-07-22 16:45:15 +03:00 · 2017-07-22 16:45:15 +03:00 · 425821863b
parent 8db6555bdb
commit 425821863b
3 changed files with 75 additions and 10 deletions
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@ -809,6 +809,7 @@ void VKGSRender::begin()
 		std::chrono::time_point<steady_clock> submit_start = steady_clock::now();

 		flush_command_queue(true);
+		m_vertex_cache.purge();

 		CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0));
 		m_last_descriptor_set = VK_NULL_HANDLE;
@ -1534,6 +1535,8 @@ void VKGSRender::process_swap_request()
 		m_text_writer->reset_descriptors();
 	}

+	m_vertex_cache.purge();
+
 	m_swap_command_buffer = nullptr;
 }

--- a/rpcs3/Emu/RSX/VK/VKGSRender.h
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.h
@ -90,6 +90,49 @@ struct command_buffer_chunk: public vk::command_buffer
 	}
 };

+struct weak_vertex_cache
+{
+	struct uploaded_range
+	{
+		u32 offset_in_heap;
+
+		VkFormat buffer_format;
+		uintptr_t local_address;
+		u32 data_length;
+	};
+
+private:
+	std::vector<uploaded_range> vertex_ranges;
+public:
+
+	uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length)
+	{
+		for (auto &v : vertex_ranges)
+		{
+			if (v.local_address == local_addr && v.buffer_format == fmt && v.data_length == data_length)
+				return &v;
+		}
+
+		return nullptr;
+	}
+
+	void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap)
+	{
+		uploaded_range v = {};
+		v.buffer_format = fmt;
+		v.data_length = data_length;
+		v.local_address = local_addr;
+		v.offset_in_heap = offset_in_heap;
+
+		vertex_ranges.push_back(v);
+	}
+
+	void purge()
+	{
+		vertex_ranges.resize(0);
+	}
+};
+
 class VKGSRender : public GSRender
 {
 private:
@ -114,6 +157,7 @@ private:

 public:
 	//vk::fbo draw_fbo;
+	weak_vertex_cache m_vertex_cache;

 private:
 	VKProgramBuffer m_prog_buffer;
--- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp
+++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp
@ -251,9 +251,11 @@ namespace
 	{
 		vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap,
 			vk::glsl::program* prog, VkDescriptorSet desc_set,
-			std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean)
+			std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
+			weak_vertex_cache& vertex_cache)
 			: vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog),
-			  descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean)
+			  descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean),
+			  vertex_cache(&vertex_cache)
 		{
 		}

@ -281,6 +283,9 @@ namespace
 			m_attrib_ring_info.unmap();
 			const VkFormat format = vk::get_suitable_vk_format(vertex_array.type, vertex_array.attribute_size);

+			const uintptr_t local_addr = (uintptr_t)vertex_array.data.data();
+			vertex_cache->store_range(local_addr, format, upload_size, (u32)offset_in_attrib_buffer);
+
 			m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, upload_size));
 			m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_array.index], descriptor_sets);
 		}
@ -336,6 +341,7 @@ namespace
 		vk::glsl::program* m_program;
 		VkDescriptorSet descriptor_sets;
 		std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
+		weak_vertex_cache* vertex_cache;
 	};

 	using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,
@ -464,7 +470,7 @@ namespace
 			const u32 vertex_count = vertex_max_index - min_index + 1;

 			vertex_buffer_visitor visitor(vertex_count, m_device,
-				m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean);
+				m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache);

 			const auto& vertex_buffers = get_vertex_buffers(
 				rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}});
@ -483,26 +489,38 @@ namespace
 				const auto &vbo = vertex_buffers[i];
 				bool can_multithread = false;

-				if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && rsxthr->vertex_upload_task_ready())
+				if (vbo.which() == 0)
 				{
 					//vertex array buffer. We can thread this thing heavily
 					const auto& v = vbo.get<rsx::vertex_array_buffer>();
+
+					const u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size);
+					const u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size);
+					const u32 upload_size = real_element_size * vertex_count;
+					const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
+					const uintptr_t local_addr = (uintptr_t)v.data.data();
+
+					const auto cached = rsxthr->m_vertex_cache.find_vertex_range(local_addr, format, upload_size);
+					if (cached)
+					{
+						m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, cached->offset_in_heap, upload_size));
+						m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);
+
+						continue;
+					}
 					
-					if (v.attribute_size > 1)
+					if (v.attribute_size > 1 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && rsxthr->vertex_upload_task_ready())
 					{
 						can_multithread = true;
 					
-						u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size);
-						u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size);
-
-						u32 upload_size = real_element_size * vertex_count;
 						size_t offset = m_attrib_ring_info.alloc<256>(upload_size);

 						memory_allocations.push_back(offset);
 						allocated_sizes.push_back(upload_size);
 						upload_jobs.push_back(i);

-						const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
+						const uintptr_t local_addr = (uintptr_t)v.data.data();
+						rsxthr->m_vertex_cache.store_range(local_addr, format, upload_size, (u32)offset);

 						m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size));
 						m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);