vk: Generate dynamic binding table based on the capability of the drivers

- This alleviates constraints imposed on shaders to allow running on some not-so-great platforms.
This commit is contained in:
kd-11 2020-01-08 19:30:35 +03:00 committed by kd-11
parent 4a5559ee65
commit db5d03c340
8 changed files with 80 additions and 70 deletions

View File

@ -116,7 +116,7 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS)
void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
{
u32 location = TEXTURES_FIRST_BIND_SLOT;
u32 location = m_binding_table.textures_first_bind_slot;
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
{
if (PT.type != "sampler1D" &&
@ -166,9 +166,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
}
}
// Some drivers (macOS) do not support more than 16 texture descriptors per stage
// TODO: If any application requires more than this, the layout can be restructured a bit
verify("Too many sampler descriptors!" HERE), location <= VERTEX_TEXTURES_FIRST_BIND_SLOT;
verify("Too many sampler descriptors!" HERE), location <= m_binding_table.vertex_textures_first_bind_slot;
std::string constants_block;
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
@ -211,17 +209,17 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
OS << "};\n\n";
vk::glsl::program_input in;
in.location = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT;
in.location = m_binding_table.fragment_constant_buffers_bind_slot;
in.domain = glsl::glsl_fragment_program;
in.name = "FragmentConstantsBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
in.location = FRAGMENT_STATE_BIND_SLOT;
in.location = m_binding_table.fragment_state_bind_slot;
in.name = "FragmentStateBuffer";
inputs.push_back(in);
in.location = FRAGMENT_TEXTURE_PARAMS_BIND_SLOT;
in.location = m_binding_table.fragment_texture_params_bind_slot;
in.name = "TextureParametersBuffer";
inputs.push_back(in);
}
@ -364,6 +362,7 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS)
void VKFragmentDecompilerThread::Task()
{
m_binding_table = vk::get_current_renderer()->get_pipeline_binding_table();
m_shader = Decompile();
vk_prog->SetInputs(inputs);
}

View File

@ -12,6 +12,7 @@ struct VKFragmentDecompilerThread : public FragmentProgramDecompiler
std::vector<vk::glsl::program_input> inputs;
class VKFragmentProgram *vk_prog;
glsl::shader_properties m_shader_props{};
vk::pipeline_binding_table m_binding_table{};
public:
VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst)

View File

@ -238,7 +238,8 @@ namespace
{
std::tuple<VkPipelineLayout, VkDescriptorSetLayout> get_shared_pipeline_layout(VkDevice dev)
{
std::array<VkDescriptorSetLayoutBinding, VK_NUM_DESCRIPTOR_BINDINGS> bindings = {};
const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table();
std::vector<VkDescriptorSetLayoutBinding> bindings(binding_table.total_descriptor_bindings);
size_t idx = 0;
@ -248,58 +249,60 @@ namespace
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = VERTEX_BUFFERS_FIRST_BIND_SLOT + i;
bindings[idx].binding = binding_table.vertex_buffers_first_bind_slot + i;
idx++;
}
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT;
bindings[idx].binding = binding_table.fragment_constant_buffers_bind_slot;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = FRAGMENT_STATE_BIND_SLOT;
bindings[idx].binding = binding_table.fragment_state_bind_slot;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = FRAGMENT_TEXTURE_PARAMS_BIND_SLOT;
bindings[idx].binding = binding_table.fragment_texture_params_bind_slot;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = VERTEX_CONSTANT_BUFFERS_BIND_SLOT;
bindings[idx].binding = binding_table.vertex_constant_buffers_bind_slot;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
bindings[idx].binding = VERTEX_PARAMS_BIND_SLOT;
bindings[idx].binding = binding_table.vertex_params_bind_slot;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = CONDITIONAL_RENDER_PREDICATE_SLOT;
bindings[idx].binding = binding_table.conditional_render_predicate_slot;
idx++;
for (int i = 0; i < rsx::limits::fragment_textures_count; i++)
for (auto binding = binding_table.textures_first_bind_slot;
binding < binding_table.vertex_textures_first_bind_slot;
binding++)
{
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = TEXTURES_FIRST_BIND_SLOT + i;
bindings[idx].binding = binding;
idx++;
}
@ -308,11 +311,11 @@ namespace
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = VERTEX_TEXTURES_FIRST_BIND_SLOT + i;
bindings[idx].binding = binding_table.vertex_textures_first_bind_slot + i;
idx++;
}
verify(HERE), idx == VK_NUM_DESCRIPTOR_BINDINGS;
verify(HERE), idx == binding_table.total_descriptor_bindings;
std::array<VkPushConstantRange, 1> push_constants;
push_constants[0].offset = 0;
@ -1105,6 +1108,8 @@ void VKGSRender::emit_geometry(u32 sub_index)
auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
bool update_descriptors = false;
const auto& binding_table = m_device->get_pipeline_binding_table();
if (sub_index == 0)
{
update_descriptors = true;
@ -1128,9 +1133,9 @@ void VKGSRender::emit_geometry(u32 sub_index)
{
// Need to update descriptors; make a copy for the next draw
VkDescriptorSet new_descriptor_set = allocate_descriptor_set();
std::array<VkCopyDescriptorSet, VK_NUM_DESCRIPTOR_BINDINGS> copy_set;
std::vector<VkCopyDescriptorSet> copy_set(binding_table.total_descriptor_bindings);
for (u32 n = 0; n < VK_NUM_DESCRIPTOR_BINDINGS; ++n)
for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n)
{
copy_set[n] =
{
@ -1146,7 +1151,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
};
}
vkUpdateDescriptorSets(*m_device, 0, 0, VK_NUM_DESCRIPTOR_BINDINGS, copy_set.data());
vkUpdateDescriptorSets(*m_device, 0, 0, binding_table.total_descriptor_bindings, copy_set.data());
m_current_frame->descriptor_set = new_descriptor_set;
update_descriptors = true;
@ -1158,9 +1163,9 @@ void VKGSRender::emit_geometry(u32 sub_index)
verify(HERE), m_vertex_layout_storage;
if (update_descriptors)
{
m_program->bind_uniform(persistent_buffer, VERTEX_BUFFERS_FIRST_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(volatile_buffer, VERTEX_BUFFERS_FIRST_BIND_SLOT + 1, m_current_frame->descriptor_set);
m_program->bind_uniform(m_vertex_layout_storage->value, VERTEX_BUFFERS_FIRST_BIND_SLOT + 2, m_current_frame->descriptor_set);
m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set);
m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set);
m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set);
}
if (!m_render_pass_open)
@ -2750,19 +2755,18 @@ void VKGSRender::load_program_env()
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 256 };
}
//if (1)
{
m_program->bind_uniform(m_vertex_env_buffer_info, VERTEX_PARAMS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_vertex_constants_buffer_info, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_constants_buffer_info, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_env_buffer_info, FRAGMENT_STATE_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_texture_params_buffer_info, FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, m_current_frame->descriptor_set);
}
const auto& binding_table = m_device->get_pipeline_binding_table();
m_program->bind_uniform(m_vertex_env_buffer_info, binding_table.vertex_params_bind_slot, m_current_frame->descriptor_set);
m_program->bind_uniform(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_constants_buffer_info, binding_table.fragment_constant_buffers_bind_slot, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_env_buffer_info, binding_table.fragment_state_bind_slot, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_texture_params_buffer_info, binding_table.fragment_texture_params_bind_slot, m_current_frame->descriptor_set);
if (vk::emulate_conditional_rendering())
{
auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer()->value;
m_program->bind_buffer({ predicate, 0, 4 }, CONDITIONAL_RENDER_PREDICATE_SLOT, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
}
//Clear flags

View File

@ -227,6 +227,17 @@ namespace vk
return result;
}
pipeline_binding_table get_pipeline_binding_table(const vk::physical_device& dev)
{
pipeline_binding_table result{};
// Need to check how many samplers are supported by the driver
const auto usable_samplers = std::min(dev.get_limits().maxPerStageDescriptorSampledImages, 32u);
result.vertex_textures_first_bind_slot = result.textures_first_bind_slot + usable_samplers;
result.total_descriptor_bindings = result.vertex_textures_first_bind_slot + 4;
return result;
}
chip_class get_chip_family(uint32_t vendor_id, uint32_t device_id)
{
if (vendor_id == 0x10DE)

View File

@ -34,18 +34,6 @@
#define DESCRIPTOR_MAX_DRAW_CALLS 16384
#define OCCLUSION_MAX_POOL_SIZE DESCRIPTOR_MAX_DRAW_CALLS
#define VERTEX_PARAMS_BIND_SLOT 0
#define VERTEX_CONSTANT_BUFFERS_BIND_SLOT 1
#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 2
#define FRAGMENT_STATE_BIND_SLOT 3
#define FRAGMENT_TEXTURE_PARAMS_BIND_SLOT 4
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 5
#define CONDITIONAL_RENDER_PREDICATE_SLOT 8
#define TEXTURES_FIRST_BIND_SLOT 9
#define VERTEX_TEXTURES_FIRST_BIND_SLOT (TEXTURES_FIRST_BIND_SLOT + 16)
#define VK_NUM_DESCRIPTOR_BINDINGS (VERTEX_TEXTURES_FIRST_BIND_SLOT + 4)
#define FRAME_PRESENT_TIMEOUT 10000000ull // 10 seconds
#define GENERAL_WAIT_TIMEOUT 2000000ull // 2 seconds
@ -127,6 +115,7 @@ namespace vk
struct memory_type_mapping;
struct gpu_formats_support;
struct fence;
struct pipeline_binding_table;
const vk::context *get_current_thread_ctx();
void set_current_thread_ctx(const vk::context &ctx);
@ -160,6 +149,7 @@ namespace vk
memory_type_mapping get_memory_mapping(const physical_device& dev);
gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev);
pipeline_binding_table get_pipeline_binding_table(const physical_device& dev);
// Sync helpers around vkQueueSubmit
void acquire_global_submit_lock();
@ -242,6 +232,20 @@ namespace vk
void die_with_error(const char* faulting_addr, VkResult error_code);
struct pipeline_binding_table
{
u8 vertex_params_bind_slot = 0;
u8 vertex_constant_buffers_bind_slot = 1;
u8 fragment_constant_buffers_bind_slot = 2;
u8 fragment_state_bind_slot = 3;
u8 fragment_texture_params_bind_slot = 4;
u8 vertex_buffers_first_bind_slot = 5;
u8 conditional_render_predicate_slot = 8;
u8 textures_first_bind_slot = 9;
u8 vertex_textures_first_bind_slot = 9; // Invalid, has to be initialized properly
u8 total_descriptor_bindings = vertex_textures_first_bind_slot; // Invalid, has to be initialized properly
};
struct memory_type_mapping
{
uint32_t host_visible_coherent;
@ -768,6 +772,7 @@ private:
physical_device *pgpu = nullptr;
memory_type_mapping memory_map{};
gpu_formats_support m_formats_support{};
pipeline_binding_table m_pipeline_binding_table{};
std::unique_ptr<mem_allocator_base> m_allocator;
VkDevice dev = VK_NULL_HANDLE;
@ -917,6 +922,7 @@ private:
memory_map = vk::get_memory_mapping(pdev);
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);
if (g_cfg.video.disable_vulkan_mem_allocator)
m_allocator = std::make_unique<vk::mem_allocator_vk>(dev, pdev);
@ -994,6 +1000,11 @@ private:
return m_formats_support;
}
const pipeline_binding_table& get_pipeline_binding_table() const
{
return m_pipeline_binding_table;
}
const gpu_shader_types_support& get_shader_types_support() const
{
return pgpu->shader_types_support;
@ -3537,8 +3548,6 @@ public:
void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set);
void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set);
u64 get_vertex_input_attributes_mask();
};
}

View File

@ -227,21 +227,5 @@ namespace vk
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << binding_point);
}
u64 program::get_vertex_input_attributes_mask()
{
if (vertex_attributes_mask)
return vertex_attributes_mask;
for (const auto &uniform : uniforms[program_input_type::input_type_texel_buffer])
{
if (uniform.domain == program_domain::glsl_vertex_program)
{
vertex_attributes_mask |= (1ull << (uniform.location - VERTEX_BUFFERS_FIRST_BIND_SLOT));
}
}
return vertex_attributes_mask;
}
}
}

View File

@ -66,7 +66,7 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
OS << "};\n\n";
vk::glsl::program_input in;
in.location = VERTEX_PARAMS_BIND_SLOT;
in.location = m_binding_table.vertex_params_bind_slot;;
in.domain = glsl::glsl_vertex_program;
in.name = "VertexContextBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
@ -80,19 +80,19 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
OS << "layout(set=0, binding=7) uniform usamplerBuffer vertex_layout_stream;\n"; // Data stream defining vertex data layout
vk::glsl::program_input in;
in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT;
in.location = m_binding_table.vertex_buffers_first_bind_slot;
in.domain = glsl::glsl_vertex_program;
in.name = "persistent_input_stream";
in.type = vk::glsl::input_type_texel_buffer;
this->inputs.push_back(in);
in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT + 1;
in.location = m_binding_table.vertex_buffers_first_bind_slot + 1;
in.domain = glsl::glsl_vertex_program;
in.name = "volatile_input_stream";
in.type = vk::glsl::input_type_texel_buffer;
this->inputs.push_back(in);
in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT + 2;
in.location = m_binding_table.vertex_buffers_first_bind_slot + 2;
in.domain = glsl::glsl_vertex_program;
in.name = "vertex_layout_stream";
in.type = vk::glsl::input_type_texel_buffer;
@ -107,7 +107,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
OS << "};\n\n";
vk::glsl::program_input in;
in.location = VERTEX_CONSTANT_BUFFERS_BIND_SLOT;
in.location = m_binding_table.vertex_constant_buffers_bind_slot;
in.domain = glsl::glsl_vertex_program;
in.name = "VertexConstantsBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
@ -115,7 +115,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
inputs.push_back(in);
u32 location = VERTEX_TEXTURES_FIRST_BIND_SLOT;
u32 location = m_binding_table.vertex_textures_first_bind_slot;
for (const ParamType &PT : constants)
{
for (const ParamItem &PI : PT.items)
@ -310,6 +310,7 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
void VKVertexDecompilerThread::Task()
{
m_device_props.emulate_conditional_rendering = vk::emulate_conditional_rendering();
m_binding_table = vk::get_current_renderer()->get_pipeline_binding_table();
m_shader = Decompile();
vk_prog->SetInputs(inputs);

View File

@ -10,6 +10,7 @@ struct VKVertexDecompilerThread : public VertexProgramDecompiler
std::string &m_shader;
std::vector<vk::glsl::program_input> inputs;
class VKVertexProgram *vk_prog;
vk::pipeline_binding_table m_binding_table{};
struct
{