vulkan; Add a weak vertex cache with single frame validity

This commit is contained in:
kd-11 2017-07-22 16:45:15 +03:00
parent 8db6555bdb
commit 425821863b
3 changed files with 75 additions and 10 deletions

View File

@ -809,6 +809,7 @@ void VKGSRender::begin()
std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
flush_command_queue(true);
m_vertex_cache.purge();
CHECK_RESULT(vkResetDescriptorPool(*m_device, descriptor_pool, 0));
m_last_descriptor_set = VK_NULL_HANDLE;
@ -1534,6 +1535,8 @@ void VKGSRender::process_swap_request()
m_text_writer->reset_descriptors();
}
m_vertex_cache.purge();
m_swap_command_buffer = nullptr;
}

View File

@ -90,6 +90,49 @@ struct command_buffer_chunk: public vk::command_buffer
}
};
struct weak_vertex_cache
{
struct uploaded_range
{
u32 offset_in_heap;
VkFormat buffer_format;
uintptr_t local_address;
u32 data_length;
};
private:
std::vector<uploaded_range> vertex_ranges;
public:
uploaded_range* find_vertex_range(uintptr_t local_addr, VkFormat fmt, u32 data_length)
{
for (auto &v : vertex_ranges)
{
if (v.local_address == local_addr && v.buffer_format == fmt && v.data_length == data_length)
return &v;
}
return nullptr;
}
void store_range(uintptr_t local_addr, VkFormat fmt, u32 data_length, u32 offset_in_heap)
{
uploaded_range v = {};
v.buffer_format = fmt;
v.data_length = data_length;
v.local_address = local_addr;
v.offset_in_heap = offset_in_heap;
vertex_ranges.push_back(v);
}
void purge()
{
vertex_ranges.resize(0);
}
};
class VKGSRender : public GSRender
{
private:
@ -114,6 +157,7 @@ private:
public:
//vk::fbo draw_fbo;
weak_vertex_cache m_vertex_cache;
private:
VKProgramBuffer m_prog_buffer;

View File

@ -251,9 +251,11 @@ namespace
{
vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap,
vk::glsl::program* prog, VkDescriptorSet desc_set,
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean)
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
weak_vertex_cache& vertex_cache)
: vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog),
descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean)
descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean),
vertex_cache(&vertex_cache)
{
}
@ -281,6 +283,9 @@ namespace
m_attrib_ring_info.unmap();
const VkFormat format = vk::get_suitable_vk_format(vertex_array.type, vertex_array.attribute_size);
const uintptr_t local_addr = (uintptr_t)vertex_array.data.data();
vertex_cache->store_range(local_addr, format, upload_size, (u32)offset_in_attrib_buffer);
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vertex_array.index], descriptor_sets);
}
@ -336,6 +341,7 @@ namespace
vk::glsl::program* m_program;
VkDescriptorSet descriptor_sets;
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
weak_vertex_cache* vertex_cache;
};
using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,
@ -464,7 +470,7 @@ namespace
const u32 vertex_count = vertex_max_index - min_index + 1;
vertex_buffer_visitor visitor(vertex_count, m_device,
m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean);
m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean, rsxthr->m_vertex_cache);
const auto& vertex_buffers = get_vertex_buffers(
rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}});
@ -483,26 +489,38 @@ namespace
const auto &vbo = vertex_buffers[i];
bool can_multithread = false;
if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && rsxthr->vertex_upload_task_ready())
if (vbo.which() == 0)
{
//vertex array buffer. We can thread this thing heavily
const auto& v = vbo.get<rsx::vertex_array_buffer>();
const u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size);
const u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size);
const u32 upload_size = real_element_size * vertex_count;
const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
const uintptr_t local_addr = (uintptr_t)v.data.data();
const auto cached = rsxthr->m_vertex_cache.find_vertex_range(local_addr, format, upload_size);
if (cached)
{
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, cached->offset_in_heap, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);
continue;
}
if (v.attribute_size > 1)
if (v.attribute_size > 1 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && rsxthr->vertex_upload_task_ready())
{
can_multithread = true;
u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size);
u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size);
u32 upload_size = real_element_size * vertex_count;
size_t offset = m_attrib_ring_info.alloc<256>(upload_size);
memory_allocations.push_back(offset);
allocated_sizes.push_back(upload_size);
upload_jobs.push_back(i);
const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
const uintptr_t local_addr = (uintptr_t)v.data.data();
rsxthr->m_vertex_cache.store_range(local_addr, format, upload_size, (u32)offset);
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);