vulkan hotfix (#3046)

* Rework vertex attribute binding for vulkan. Allows always providing a buffer view to the pipeline even if the game has the attribute disabled as long as it is consumed by the vertex shader.
This commit is contained in:
kd-11 2017-07-22 01:54:28 +03:00 committed by GitHub
parent 9747ab61f9
commit df8fa74e2a
9 changed files with 77 additions and 44 deletions

View File

@ -468,7 +468,7 @@ D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList* comma
return std::apply_visitor(
draw_command_visitor(command_list, m_buffer_data, m_vertex_buffer_data.Get(),
[this](
const auto& state, const auto& list) { return get_vertex_buffers(state, list); }),
const auto& state, const auto& list) { return get_vertex_buffers(state, list, 0); }),
get_draw_command(rsx::method_registers));
}

View File

@ -454,7 +454,7 @@ std::tuple<u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::set_vertex_b
auto result = std::apply_visitor(draw_command_visitor(*m_index_ring_buffer, *m_attrib_ring_buffer,
m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment,
[this](const auto& state, const auto& list) {
return this->get_vertex_buffers(state, list);
return this->get_vertex_buffers(state, list, 0);
}),
get_draw_command(rsx::method_registers));

View File

@ -748,7 +748,7 @@ namespace rsx
}
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector<std::pair<u32, u32>>& vertex_ranges) const
thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector<std::pair<u32, u32>>& vertex_ranges, const u64 consumed_attrib_mask) const
{
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> result;
result.reserve(rsx::limits::vertex_count);
@ -756,8 +756,10 @@ namespace rsx
u32 input_mask = state.vertex_attrib_input_mask();
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{
bool enabled = !!(input_mask & (1 << index));
if (!enabled)
const bool enabled = !!(input_mask & (1 << index));
const bool consumed = !!(consumed_attrib_mask & (1ull << index));
if (!enabled && !consumed)
continue;
if (state.vertex_arrays_info[index].size() > 0)

View File

@ -213,7 +213,7 @@ namespace rsx
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
get_vertex_buffers(const rsx::rsx_state& state, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
get_vertex_buffers(const rsx::rsx_state& state, const std::vector<std::pair<u32, u32>>& vertex_ranges, const u64 consumed_attrib_mask) const;
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
get_draw_command(const rsx::rsx_state& state) const;

View File

@ -102,8 +102,7 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS)
void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
{
int location = 0;
int location = TEXTURES_FIRST_BIND_SLOT;
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
{
if (PT.type != "sampler1D" &&
@ -142,7 +141,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
inputs.push_back(in);
OS << "layout(set=0, binding=" << 19 + location++ << ") uniform " << samplerType << " " << PI.name << ";\n";
OS << "layout(set=0, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";\n";
}
}
@ -169,7 +168,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
OS << "};\n";
vk::glsl::program_input in;
in.location = 1;
in.location = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT;
in.domain = vk::glsl::glsl_fragment_program;
in.name = "FragmentConstantsBuffer";
in.type = vk::glsl::input_type_uniform_buffer;

View File

@ -1420,6 +1420,7 @@ namespace vk
public:
VkPipeline pipeline;
u64 attribute_location_mask;
u64 vertex_attributes_mask;
program(VkDevice dev, VkPipeline p, const std::vector<program_input> &vertex_input, const std::vector<program_input>& fragment_inputs);
program(const program&) = delete;
@ -1432,6 +1433,8 @@ namespace vk
void bind_uniform(VkDescriptorImageInfo image_descriptor, std::string uniform_name, VkDescriptorSet &descriptor_set);
void bind_uniform(VkDescriptorBufferInfo buffer_descriptor, uint32_t binding_point, VkDescriptorSet &descriptor_set);
void bind_uniform(const VkBufferView &buffer_view, const std::string &binding_name, VkDescriptorSet &descriptor_set);
u64 get_vertex_input_attributes_mask();
};
}

View File

@ -11,6 +11,7 @@ namespace vk
load_uniforms(glsl::program_domain::glsl_vertex_program, vertex_input);
load_uniforms(glsl::program_domain::glsl_vertex_program, fragment_inputs);
attribute_location_mask = 0;
vertex_attributes_mask = 0;
}
program::~program()
@ -58,10 +59,10 @@ namespace vk
descriptor_writer.pImageInfo = &image_descriptor;
descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptor_writer.dstArrayElement = 0;
descriptor_writer.dstBinding = uniform.location + TEXTURES_FIRST_BIND_SLOT;
descriptor_writer.dstBinding = uniform.location;
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << (uniform.location + TEXTURES_FIRST_BIND_SLOT));
attribute_location_mask |= (1ull << uniform.location);
return;
}
}
@ -97,15 +98,32 @@ namespace vk
descriptor_writer.pTexelBufferView = &buffer_view;
descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
descriptor_writer.dstArrayElement = 0;
descriptor_writer.dstBinding = uniform.location + VERTEX_BUFFERS_FIRST_BIND_SLOT;
descriptor_writer.dstBinding = uniform.location;
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << (uniform.location + VERTEX_BUFFERS_FIRST_BIND_SLOT));
attribute_location_mask |= (1ull << uniform.location);
return;
}
}
LOG_NOTICE(RSX, "vertex buffer not found in program: %s", binding_name.c_str());
}
u64 program::get_vertex_input_attributes_mask()
{
if (vertex_attributes_mask)
return vertex_attributes_mask;
for (auto &uniform : uniforms)
{
if (uniform.domain == program_domain::glsl_vertex_program &&
uniform.type == program_input_type::input_type_texel_buffer)
{
vertex_attributes_mask |= (1ull << (uniform.location - VERTEX_BUFFERS_FIRST_BIND_SLOT));
}
}
return vertex_attributes_mask;
}
}
}

View File

@ -259,6 +259,9 @@ namespace
void operator()(const rsx::vertex_array_buffer& vertex_array)
{
if (!m_program->has_uniform(s_reg_table[vertex_array.index]))
return;
// Fill vertex_array
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size);
@ -284,6 +287,9 @@ namespace
void operator()(const rsx::vertex_array_register& vertex_register)
{
if (!m_program->has_uniform(s_reg_table[vertex_register.index]))
return;
size_t data_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size);
const VkFormat format = vk::get_suitable_vk_format(vertex_register.type, vertex_register.attribute_size);
@ -316,11 +322,10 @@ namespace
void operator()(const rsx::empty_vertex_array& vbo)
{
size_t offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(32);
void *dst = m_attrib_ring_info.map(offset_in_attrib_buffer, 32);
memset(dst, 0, 32);
m_attrib_ring_info.unmap();
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, VK_FORMAT_R32_SFLOAT, offset_in_attrib_buffer, 32));
if (!m_program->has_uniform(s_reg_table[vbo.index]))
return;
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[vbo.index], descriptor_sets);
}
@ -476,28 +481,35 @@ namespace
for (int i = 0; i < vertex_buffers.size(); ++i)
{
const auto &vbo = vertex_buffers[i];
bool can_multithread = false;
if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready())
if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && rsxthr->vertex_upload_task_ready())
{
//vertex array buffer. We can thread this thing heavily
const auto& v = vbo.get<rsx::vertex_array_buffer>();
u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size);
u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size);
if (v.attribute_size > 1)
{
can_multithread = true;
u32 element_size = rsx::get_vertex_type_size_on_host(v.type, v.attribute_size);
u32 real_element_size = vk::get_suitable_vk_size(v.type, v.attribute_size);
u32 upload_size = real_element_size * vertex_count;
size_t offset = m_attrib_ring_info.alloc<256>(upload_size);
u32 upload_size = real_element_size * vertex_count;
size_t offset = m_attrib_ring_info.alloc<256>(upload_size);
memory_allocations.push_back(offset);
allocated_sizes.push_back(upload_size);
upload_jobs.push_back(i);
memory_allocations.push_back(offset);
allocated_sizes.push_back(upload_size);
upload_jobs.push_back(i);
const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
const VkFormat format = vk::get_suitable_vk_format(v.type, v.attribute_size);
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, format, offset, upload_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[v.index], m_descriptor_sets);
}
}
else
if (!can_multithread)
std::apply_visitor(visitor, vbo);
}
@ -595,13 +607,16 @@ namespace
sizeof(u32)) /
stride;
for (int index = 0; index < rsx::limits::vertex_count; ++index) {
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
auto& vertex_info = rsx::method_registers.vertex_arrays_info[index];
if (!m_program->has_uniform(s_reg_table[index])) continue;
if (!vertex_info.size()) // disabled
{
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device, m_attrib_ring_info.heap->value, VK_FORMAT_R8G8B8A8_UNORM, 0, 0));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets);
continue;
}
@ -624,8 +639,7 @@ namespace
// TODO: properly handle cmp type
if (vertex_info.type() == rsx::vertex_base_type::cmp)
LOG_ERROR(
RSX, "Compressed vertex attributes not supported for inlined arrays yet");
LOG_ERROR(RSX, "Compressed vertex attributes not supported for inlined arrays yet");
switch (vertex_info.type())
{
@ -668,6 +682,6 @@ VKGSRender::upload_vertex_data()
{
draw_command_visitor visitor(*m_device, m_index_buffer_ring_info, m_attrib_ring_info, m_program,
descriptor_sets, m_buffer_view_to_clean,
[this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range);}, this);
[this](const auto& state, const auto& range) { return this->get_vertex_buffers(state, range, m_program->get_vertex_input_attributes_mask());}, this);
return std::apply_visitor(visitor, get_draw_command(rsx::method_registers));
}

View File

@ -38,7 +38,7 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
OS << "};\n";
vk::glsl::program_input in;
in.location = 0;
in.location = SCALE_OFFSET_BIND_SLOT;
in.domain = vk::glsl::glsl_vertex_program;
in.name = "ScaleOffsetBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
@ -64,7 +64,6 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
std::sort(input_data.begin(), input_data.end());
int location = 2;
for (const std::tuple<size_t, std::string> item : input_data)
{
for (const ParamType &PT : inputs)
@ -74,7 +73,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
if (PI.name == std::get<1>(item))
{
vk::glsl::program_input in;
in.location = location;
in.location = (int)std::get<0>(item) + VERTEX_BUFFERS_FIRST_BIND_SLOT;
in.domain = vk::glsl::glsl_vertex_program;
in.name = PI.name + "_buffer";
in.type = vk::glsl::input_type_texel_buffer;
@ -92,7 +91,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
}
std::string samplerType = is_int ? "isamplerBuffer" : "samplerBuffer";
OS << "layout(set = 0, binding=" << 3 + location++ << ")" << " uniform " << samplerType << " " << PI.name << "_buffer;\n";
OS << "layout(set = 0, binding=" << in.location << ")" << " uniform " << samplerType << " " << PI.name << "_buffer;\n";
}
}
}
@ -108,17 +107,15 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
OS << "};\n\n";
vk::glsl::program_input in;
in.location = 1;
in.location = VERTEX_CONSTANT_BUFFERS_BIND_SLOT;
in.domain = vk::glsl::glsl_vertex_program;
in.name = "VertexConstantsBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
//We offset this value by the index of the first fragment texture (19) below
//and allow 16 fragment textures to precede this slot
int location = 16;
int location = VERTEX_TEXTURES_FIRST_BIND_SLOT;
for (const ParamType &PT : constants)
{
for (const ParamItem &PI : PT.items)
@ -137,7 +134,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
inputs.push_back(in);
OS << "layout(set = 0, binding=" << 19 + location++ << ") uniform " << PT.type << " " << PI.name << ";\n";
OS << "layout(set = 0, binding=" << location++ << ") uniform " << PT.type << " " << PI.name << ";\n";
}
}
}