rsx/vk: Vertex cache rewritten, add option to disable it as well

- Also enable SPU loop detection by default while were at it
2017-07-24 20:49:51 +03:00 · 2017-07-24 20:49:51 +03:00 · 46fa6e47fe
parent 7fa42cfaad
commit 46fa6e47fe
6 changed files with 79 additions and 53 deletions
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@ -626,6 +626,11 @@ VKGSRender::VKGSRender() : GSRender()
 		m_text_writer.reset(new vk::text_writer());
 		m_text_writer->init(*m_device, m_memory_type_mapping, m_render_passes[idx]);
 	}
+
+	if (g_cfg.video.disable_vertex_cache)
+		m_vertex_cache.reset(new null_vertex_cache());
+	else
+		m_vertex_cache.reset(new vk::vertex_cache::weak_vertex_cache());
 }

 VKGSRender::~VKGSRender()
@ -809,7 +814,7 @@ void VKGSRender::begin()
 		std::chrono::time_point<steady_clock> submit_start = steady_clock::now();

 		flush_command_queue(true);
-		m_vertex_cache.purge();
+		m_vertex_cache->purge();

 		CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0));
 		m_last_descriptor_set = VK_NULL_HANDLE;
@ -1264,6 +1269,8 @@ void VKGSRender::on_init_thread()

 	GSRender::on_init_thread();
 	rsx_thread = std::this_thread::get_id();
+
+	thread_ctrl::set_native_priority(1);
 }

 void VKGSRender::on_exit()
@ -1535,7 +1542,7 @@ void VKGSRender::process_swap_request()
 		m_text_writer->reset_descriptors();
 	}

-	m_vertex_cache.purge();
+	m_vertex_cache->purge();

 	m_swap_command_buffer = nullptr;
 }
--- a/rpcs3/Emu/RSX/VK/VKGSRender.h
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.h
@ -16,6 +16,9 @@

 #pragma comment(lib, "VKstatic.1.lib")

+using namespace vk::vertex_cache;
+using null_vertex_cache = rsx::vertex_cache<uploaded_range, VkFormat>;
+
 //Heap allocation sizes in MB
 #define VK_ATTRIB_RING_BUFFER_SIZE_M 256
 #define VK_UBO_RING_BUFFER_SIZE_M 32
@ -90,49 +93,6 @@ struct command_buffer_chunk: public vk::command_buffer
 	}
 };

-struct weak_vertex_cache
-{
-	struct uploaded_range
-	{
-		u32 offset_in_heap;
-
-		VkFormat buffer_format;
-		uintptr_t local_address;
-		u32 data_length;
-	};
-
-private:
-	std::vector<uploaded_range> vertex_ranges;
-public:
-
-	uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length)
-	{
-		for (auto &v : vertex_ranges)
-		{
-			if (v.local_address == local_addr && v.buffer_format == fmt && v.data_length == data_length)
-				return &v;
-		}
-
-		return nullptr;
-	}
-
-	void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap)
-	{
-		uploaded_range v = {};
-		v.buffer_format = fmt;
-		v.data_length = data_length;
-		v.local_address = local_addr;
-		v.offset_in_heap = offset_in_heap;
-
-		vertex_ranges.push_back(v);
-	}
-
-	void purge()
-	{
-		vertex_ranges.resize(0);
-	}
-};
-
 class VKGSRender : public GSRender
 {
 private:
@ -157,7 +117,7 @@ private:

 public:
 	//vk::fbo draw_fbo;
-	weak_vertex_cache m_vertex_cache;
+	std::unique_ptr<null_vertex_cache> m_vertex_cache;

 private:
 	VKProgramBuffer m_prog_buffer;
--- a/rpcs3/Emu/RSX/VK/VKHelpers.h
+++ b/rpcs3/Emu/RSX/VK/VKHelpers.h
@ -17,6 +17,7 @@
 #include "../GCM.h"
 #include "../Common/TextureUtils.h"
 #include "../Common/ring_buffer_helper.h"
+#include "../rsx_cache.h"

 #define DESCRIPTOR_MAX_DRAW_CALLS 4096

@ -1456,6 +1457,54 @@ namespace vk
 		}
 	};

+	namespace vertex_cache
+	{
+		struct uploaded_range
+		{
+			uintptr_t local_address;
+			VkFormat buffer_format;
+			u32 offset_in_heap;
+			u32 data_length;
+		};
+
+		// A weak vertex cache with no data checks or memory range locks
+		// Of limited use since contents are only guaranteed to be valid once per frame
+		// TODO: Strict vertex cache with range locks
+		class weak_vertex_cache: public rsx::vertex_cache<uploaded_range, VkFormat>
+		{
+		private:
+			std::unordered_map<uintptr_t, std::vector<uploaded_range>> vertex_ranges;
+		public:
+
+			uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length) override
+			{
+				for (auto &v : vertex_ranges[local_addr])
+				{
+					if (v.buffer_format == fmt && v.data_length == data_length)
+						return &v;
+				}
+
+				return nullptr;
+			}
+
+			void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap) override
+			{
+				uploaded_range v = {};
+				v.buffer_format = fmt;
+				v.data_length = data_length;
+				v.local_address = local_addr;
+				v.offset_in_heap = offset_in_heap;
+
+				vertex_ranges[local_addr].push_back(v);
+			}
+
+			void purge() override
+			{
+				vertex_ranges.clear();
+			}
+		};
+	}
+
 	/**
 	* Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer.
 	* Then copy all layers into dst_image.
--- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp
+++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp
@ -252,10 +252,10 @@ namespace
 		vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap,
 			vk::glsl::program* prog, VkDescriptorSet desc_set,
 			std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
-			weak_vertex_cache& vertex_cache)
+			rsx::vertex_cache<uploaded_range, VkFormat>* vertex_cache)
 			: vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog),
 			  descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean),
-			  vertex_cache(&vertex_cache)
+			  vertex_cache(vertex_cache)
 		{
 		}

@ -341,7 +341,7 @@ namespace
 		vk::glsl::program* m_program;
 		VkDescriptorSet descriptor_sets;
 		std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
-		weak_vertex_cache* vertex_cache;
+		rsx::vertex_cache<uploaded_range, VkFormat>* vertex_cache;
 	};

 	using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,
@ -470,7 +470,7 @@ namespace
 			const u32 vertex_count = vertex_max_index - min_index + 1;

 			vertex_buffer_visitor visitor(vertex_count, m_device,
-				m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache);
+				m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache.get());

 			const auto& vertex_buffers = get_vertex_buffers(
 				rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}});
@ -500,7 +500,7 @@ namespace
 					const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
 					const uintptr_t local_addr = (uintptr_t)v.data.data();

-					const auto cached = rsxthr->m_vertex_cache.find_vertex_range(local_addr, format, upload_size);
+					const auto cached = rsxthr->m_vertex_cache->find_vertex_range(local_addr, format, upload_size);
 					if (cached)
 					{
 						m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, cached->offset_in_heap, upload_size));
@ -520,7 +520,7 @@ namespace
 						upload_jobs.push_back(i);

 						const uintptr_t local_addr = (uintptr_t)v.data.data();
-						rsxthr->m_vertex_cache.store_range(local_addr, format, upload_size, (u32)offset);
+						rsxthr->m_vertex_cache->store_range(local_addr, format, upload_size, (u32)offset);

 						m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size));
 						m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);
--- a/rpcs3/Emu/RSX/rsx_cache.h
+++ b/rpcs3/Emu/RSX/rsx_cache.h
@ -194,4 +194,13 @@ namespace rsx
 			return std::make_pair(min, max);
 		}
 	};
+
+	template <typename storage_type, typename upload_format>
+	class vertex_cache
+	{
+	public:
+		virtual storage_type* find_vertex_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr;  }
+		virtual void store_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
+		virtual void purge() {}
+	};
 }
--- a/rpcs3/Emu/System.h
+++ b/rpcs3/Emu/System.h
@ -277,7 +277,7 @@ struct cfg_root : cfg::node
 		cfg::_int<32, 16384> max_spu_immediate_write_size{this, "Maximum immediate DMA write size", 16384}; // Maximum size that an SPU thread can write directly without posting to MFC
 		cfg::_int<0, 6> preferred_spu_threads{this, "Preferred SPU Threads", 0}; //Numnber of hardware threads dedicated to heavy simultaneous spu tasks
 		cfg::_int<0, 16> spu_delay_penalty{this, "SPU delay penalty", 3}; //Number of milliseconds to block a thread if a virtual 'core' isn't free
-		cfg::_bool spu_loop_detection{this, "SPU loop detection", false}; //Try to detect wait loops and trigger thread yield
+		cfg::_bool spu_loop_detection{this, "SPU loop detection", true}; //Try to detect wait loops and trigger thread yield

 		cfg::_enum<lib_loading_type> lib_loading{this, "Lib Loader", lib_loading_type::automatic};
 		cfg::_bool hook_functions{this, "Hook static functions"};
@ -326,6 +326,7 @@ struct cfg_root : cfg::node
 		cfg::_bool invalidate_surface_cache_every_frame{this, "Invalidate Cache Every Frame", true};
 		cfg::_bool strict_rendering_mode{this, "Strict Rendering Mode"};

+		cfg::_bool disable_vertex_cache{this, "Disable Vertex Cache", false};
 		cfg::_bool batch_instanced_geometry{this, "Batch Instanced Geometry", false}; //Avoid re-uploading geometry if the same draw command is repeated
 		cfg::_int<1, 16> vertex_upload_threads{ this, "Vertex Upload Threads", 1 }; //Max number of threads to use for parallel vertex processing
 		cfg::_int<32, 65536> mt_vertex_upload_threshold{ this, "Multithreaded Vertex Upload Threshold", 512}; //Minimum vertex count to parallelize