rsx/vk: Vertex cache rewritten, add option to disable it as well

- Also enable SPU loop detection by default while were at it
This commit is contained in:
kd-11 2017-07-24 20:49:51 +03:00
parent 7fa42cfaad
commit 46fa6e47fe
6 changed files with 79 additions and 53 deletions

View File

@ -626,6 +626,11 @@ VKGSRender::VKGSRender() : GSRender()
m_text_writer.reset(new vk::text_writer());
m_text_writer->init(*m_device, m_memory_type_mapping, m_render_passes[idx]);
}
if (g_cfg.video.disable_vertex_cache)
m_vertex_cache.reset(new null_vertex_cache());
else
m_vertex_cache.reset(new vk::vertex_cache::weak_vertex_cache());
}
VKGSRender::~VKGSRender()
@ -809,7 +814,7 @@ void VKGSRender::begin()
std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
flush_command_queue(true);
m_vertex_cache.purge();
m_vertex_cache->purge();
CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0));
m_last_descriptor_set = VK_NULL_HANDLE;
@ -1264,6 +1269,8 @@ void VKGSRender::on_init_thread()
GSRender::on_init_thread();
rsx_thread = std::this_thread::get_id();
thread_ctrl::set_native_priority(1);
}
void VKGSRender::on_exit()
@ -1535,7 +1542,7 @@ void VKGSRender::process_swap_request()
m_text_writer->reset_descriptors();
}
m_vertex_cache.purge();
m_vertex_cache->purge();
m_swap_command_buffer = nullptr;
}

View File

@ -16,6 +16,9 @@
#pragma comment(lib, "VKstatic.1.lib")
using namespace vk::vertex_cache;
using null_vertex_cache = rsx::vertex_cache<uploaded_range, VkFormat>;
//Heap allocation sizes in MB
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
#define VK_UBO_RING_BUFFER_SIZE_M 32
@ -90,49 +93,6 @@ struct command_buffer_chunk: public vk::command_buffer
}
};
struct weak_vertex_cache
{
struct uploaded_range
{
u32 offset_in_heap;
VkFormat buffer_format;
uintptr_t local_address;
u32 data_length;
};
private:
std::vector<uploaded_range> vertex_ranges;
public:
uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length)
{
for (auto &v : vertex_ranges)
{
if (v.local_address == local_addr && v.buffer_format == fmt && v.data_length == data_length)
return &v;
}
return nullptr;
}
void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap)
{
uploaded_range v = {};
v.buffer_format = fmt;
v.data_length = data_length;
v.local_address = local_addr;
v.offset_in_heap = offset_in_heap;
vertex_ranges.push_back(v);
}
void purge()
{
vertex_ranges.resize(0);
}
};
class VKGSRender : public GSRender
{
private:
@ -157,7 +117,7 @@ private:
public:
//vk::fbo draw_fbo;
weak_vertex_cache m_vertex_cache;
std::unique_ptr<null_vertex_cache> m_vertex_cache;
private:
VKProgramBuffer m_prog_buffer;

View File

@ -17,6 +17,7 @@
#include "../GCM.h"
#include "../Common/TextureUtils.h"
#include "../Common/ring_buffer_helper.h"
#include "../rsx_cache.h"
#define DESCRIPTOR_MAX_DRAW_CALLS 4096
@ -1456,6 +1457,54 @@ namespace vk
}
};
namespace vertex_cache
{
struct uploaded_range
{
uintptr_t local_address;
VkFormat buffer_format;
u32 offset_in_heap;
u32 data_length;
};
// A weak vertex cache with no data checks or memory range locks
// Of limited use since contents are only guaranteed to be valid once per frame
// TODO: Strict vertex cache with range locks
class weak_vertex_cache: public rsx::vertex_cache<uploaded_range, VkFormat>
{
private:
std::unordered_map<uintptr_t, std::vector<uploaded_range>> vertex_ranges;
public:
uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length) override
{
for (auto &v : vertex_ranges[local_addr])
{
if (v.buffer_format == fmt && v.data_length == data_length)
return &v;
}
return nullptr;
}
void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap) override
{
uploaded_range v = {};
v.buffer_format = fmt;
v.data_length = data_length;
v.local_address = local_addr;
v.offset_in_heap = offset_in_heap;
vertex_ranges[local_addr].push_back(v);
}
void purge() override
{
vertex_ranges.clear();
}
};
}
/**
* Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer.
* Then copy all layers into dst_image.

View File

@ -252,10 +252,10 @@ namespace
vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap,
vk::glsl::program* prog, VkDescriptorSet desc_set,
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
weak_vertex_cache& vertex_cache)
rsx::vertex_cache<uploaded_range, VkFormat>* vertex_cache)
: vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog),
descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean),
vertex_cache(&vertex_cache)
vertex_cache(vertex_cache)
{
}
@ -341,7 +341,7 @@ namespace
vk::glsl::program* m_program;
VkDescriptorSet descriptor_sets;
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
weak_vertex_cache* vertex_cache;
rsx::vertex_cache<uploaded_range, VkFormat>* vertex_cache;
};
using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,
@ -470,7 +470,7 @@ namespace
const u32 vertex_count = vertex_max_index - min_index + 1;
vertex_buffer_visitor visitor(vertex_count, m_device,
m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache);
m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache.get());
const auto& vertex_buffers = get_vertex_buffers(
rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}});
@ -500,7 +500,7 @@ namespace
const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
const uintptr_t local_addr = (uintptr_t)v.data.data();
const auto cached = rsxthr->m_vertex_cache.find_vertex_range(local_addr, format, upload_size);
const auto cached = rsxthr->m_vertex_cache->find_vertex_range(local_addr, format, upload_size);
if (cached)
{
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, cached->offset_in_heap, upload_size));
@ -520,7 +520,7 @@ namespace
upload_jobs.push_back(i);
const uintptr_t local_addr = (uintptr_t)v.data.data();
rsxthr->m_vertex_cache.store_range(local_addr, format, upload_size, (u32)offset);
rsxthr->m_vertex_cache->store_range(local_addr, format, upload_size, (u32)offset);
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);

View File

@ -194,4 +194,13 @@ namespace rsx
return std::make_pair(min, max);
}
};
template <typename storage_type, typename upload_format>
class vertex_cache
{
public:
virtual storage_type* find_vertex_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/) { return nullptr; }
virtual void store_range(uintptr_t /*local_addr*/, upload_format, u32 /*data_length*/, u32 /*offset_in_heap*/) {}
virtual void purge() {}
};
}

View File

@ -277,7 +277,7 @@ struct cfg_root : cfg::node
cfg::_int<32, 16384> max_spu_immediate_write_size{this, "Maximum immediate DMA write size", 16384}; // Maximum size that an SPU thread can write directly without posting to MFC
cfg::_int<0, 6> preferred_spu_threads{this, "Preferred SPU Threads", 0}; //Numnber of hardware threads dedicated to heavy simultaneous spu tasks
cfg::_int<0, 16> spu_delay_penalty{this, "SPU delay penalty", 3}; //Number of milliseconds to block a thread if a virtual 'core' isn't free
cfg::_bool spu_loop_detection{this, "SPU loop detection", false}; //Try to detect wait loops and trigger thread yield
cfg::_bool spu_loop_detection{this, "SPU loop detection", true}; //Try to detect wait loops and trigger thread yield
cfg::_enum<lib_loading_type> lib_loading{this, "Lib Loader", lib_loading_type::automatic};
cfg::_bool hook_functions{this, "Hook static functions"};
@ -326,6 +326,7 @@ struct cfg_root : cfg::node
cfg::_bool invalidate_surface_cache_every_frame{this, "Invalidate Cache Every Frame", true};
cfg::_bool strict_rendering_mode{this, "Strict Rendering Mode"};
cfg::_bool disable_vertex_cache{this, "Disable Vertex Cache", false};
cfg::_bool batch_instanced_geometry{this, "Batch Instanced Geometry", false}; //Avoid re-uploading geometry if the same draw command is repeated
cfg::_int<1, 16> vertex_upload_threads{ this, "Vertex Upload Threads", 1 }; //Max number of threads to use for parallel vertex processing
cfg::_int<32, 65536> mt_vertex_upload_threshold{ this, "Multithreaded Vertex Upload Threshold", 512}; //Minimum vertex count to parallelize