diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index a3eb50da7e..97b8bdf395 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -116,7 +116,7 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) { - u32 location = TEXTURES_FIRST_BIND_SLOT; + u32 location = m_binding_table.textures_first_bind_slot; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { if (PT.type != "sampler1D" && @@ -166,9 +166,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) } } - // Some drivers (macOS) do not support more than 16 texture descriptors per stage - // TODO: If any application requires more than this, the layout can be restructured a bit - verify("Too many sampler descriptors!" HERE), location <= VERTEX_TEXTURES_FIRST_BIND_SLOT; + verify("Too many sampler descriptors!" HERE), location <= m_binding_table.vertex_textures_first_bind_slot; std::string constants_block; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) @@ -211,17 +209,17 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << "};\n\n"; vk::glsl::program_input in; - in.location = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT; + in.location = m_binding_table.fragment_constant_buffers_bind_slot; in.domain = glsl::glsl_fragment_program; in.name = "FragmentConstantsBuffer"; in.type = vk::glsl::input_type_uniform_buffer; inputs.push_back(in); - in.location = FRAGMENT_STATE_BIND_SLOT; + in.location = m_binding_table.fragment_state_bind_slot; in.name = "FragmentStateBuffer"; inputs.push_back(in); - in.location = FRAGMENT_TEXTURE_PARAMS_BIND_SLOT; + in.location = m_binding_table.fragment_texture_params_bind_slot; in.name = "TextureParametersBuffer"; inputs.push_back(in); } @@ -364,6 +362,7 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) void VKFragmentDecompilerThread::Task() { + m_binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); m_shader = Decompile(); vk_prog->SetInputs(inputs); } diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index d658f4acd9..a4fbb28352 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -12,6 +12,7 @@ struct VKFragmentDecompilerThread : public FragmentProgramDecompiler std::vector inputs; class VKFragmentProgram *vk_prog; glsl::shader_properties m_shader_props{}; + vk::pipeline_binding_table m_binding_table{}; public: VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8450b09eb6..423b381adb 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -238,7 +238,8 @@ namespace { std::tuple get_shared_pipeline_layout(VkDevice dev) { - std::array bindings = {}; + const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + std::vector bindings(binding_table.total_descriptor_bindings); size_t idx = 0; @@ -248,58 +249,60 @@ namespace bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = VERTEX_BUFFERS_FIRST_BIND_SLOT + i; + bindings[idx].binding = binding_table.vertex_buffers_first_bind_slot + i; idx++; } bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT; + bindings[idx].binding = binding_table.fragment_constant_buffers_bind_slot; idx++; bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = FRAGMENT_STATE_BIND_SLOT; + bindings[idx].binding = binding_table.fragment_state_bind_slot; idx++; bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = FRAGMENT_TEXTURE_PARAMS_BIND_SLOT; + bindings[idx].binding = binding_table.fragment_texture_params_bind_slot; idx++; bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = VERTEX_CONSTANT_BUFFERS_BIND_SLOT; + bindings[idx].binding = binding_table.vertex_constant_buffers_bind_slot; idx++; bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; - bindings[idx].binding = VERTEX_PARAMS_BIND_SLOT; + bindings[idx].binding = binding_table.vertex_params_bind_slot; idx++; bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = CONDITIONAL_RENDER_PREDICATE_SLOT; + bindings[idx].binding = binding_table.conditional_render_predicate_slot; idx++; - for (int i = 0; i < rsx::limits::fragment_textures_count; i++) + for (auto binding = binding_table.textures_first_bind_slot; + binding < binding_table.vertex_textures_first_bind_slot; + binding++) { bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = TEXTURES_FIRST_BIND_SLOT + i; + bindings[idx].binding = binding; idx++; } @@ -308,11 +311,11 @@ namespace bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[idx].descriptorCount = 1; bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = VERTEX_TEXTURES_FIRST_BIND_SLOT + i; + bindings[idx].binding = binding_table.vertex_textures_first_bind_slot + i; idx++; } - verify(HERE), idx == VK_NUM_DESCRIPTOR_BINDINGS; + verify(HERE), idx == binding_table.total_descriptor_bindings; std::array push_constants; push_constants[0].offset = 0; @@ -1105,6 +1108,8 @@ void VKGSRender::emit_geometry(u32 sub_index) auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; bool update_descriptors = false; + const auto& binding_table = m_device->get_pipeline_binding_table(); + if (sub_index == 0) { update_descriptors = true; @@ -1128,9 +1133,9 @@ void VKGSRender::emit_geometry(u32 sub_index) { // Need to update descriptors; make a copy for the next draw VkDescriptorSet new_descriptor_set = allocate_descriptor_set(); - std::array copy_set; + std::vector copy_set(binding_table.total_descriptor_bindings); - for (u32 n = 0; n < VK_NUM_DESCRIPTOR_BINDINGS; ++n) + for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n) { copy_set[n] = { @@ -1146,7 +1151,7 @@ void VKGSRender::emit_geometry(u32 sub_index) }; } - vkUpdateDescriptorSets(*m_device, 0, 0, VK_NUM_DESCRIPTOR_BINDINGS, copy_set.data()); + vkUpdateDescriptorSets(*m_device, 0, 0, binding_table.total_descriptor_bindings, copy_set.data()); m_current_frame->descriptor_set = new_descriptor_set; update_descriptors = true; @@ -1158,9 +1163,9 @@ void VKGSRender::emit_geometry(u32 sub_index) verify(HERE), m_vertex_layout_storage; if (update_descriptors) { - m_program->bind_uniform(persistent_buffer, VERTEX_BUFFERS_FIRST_BIND_SLOT, m_current_frame->descriptor_set); - m_program->bind_uniform(volatile_buffer, VERTEX_BUFFERS_FIRST_BIND_SLOT + 1, m_current_frame->descriptor_set); - m_program->bind_uniform(m_vertex_layout_storage->value, VERTEX_BUFFERS_FIRST_BIND_SLOT + 2, m_current_frame->descriptor_set); + m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set); + m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set); } if (!m_render_pass_open) @@ -2750,19 +2755,18 @@ void VKGSRender::load_program_env() m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 256 }; } - //if (1) - { - m_program->bind_uniform(m_vertex_env_buffer_info, VERTEX_PARAMS_BIND_SLOT, m_current_frame->descriptor_set); - m_program->bind_uniform(m_vertex_constants_buffer_info, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); - m_program->bind_uniform(m_fragment_constants_buffer_info, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); - m_program->bind_uniform(m_fragment_env_buffer_info, FRAGMENT_STATE_BIND_SLOT, m_current_frame->descriptor_set); - m_program->bind_uniform(m_fragment_texture_params_buffer_info, FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, m_current_frame->descriptor_set); - } + const auto& binding_table = m_device->get_pipeline_binding_table(); + + m_program->bind_uniform(m_vertex_env_buffer_info, binding_table.vertex_params_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(m_fragment_constants_buffer_info, binding_table.fragment_constant_buffers_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(m_fragment_env_buffer_info, binding_table.fragment_state_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(m_fragment_texture_params_buffer_info, binding_table.fragment_texture_params_bind_slot, m_current_frame->descriptor_set); if (vk::emulate_conditional_rendering()) { auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer()->value; - m_program->bind_buffer({ predicate, 0, 4 }, CONDITIONAL_RENDER_PREDICATE_SLOT, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); } //Clear flags diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 0492f7c5b0..d515a09f16 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -227,6 +227,17 @@ namespace vk return result; } + pipeline_binding_table get_pipeline_binding_table(const vk::physical_device& dev) + { + pipeline_binding_table result{}; + + // Need to check how many samplers are supported by the driver + const auto usable_samplers = std::min(dev.get_limits().maxPerStageDescriptorSampledImages, 32u); + result.vertex_textures_first_bind_slot = result.textures_first_bind_slot + usable_samplers; + result.total_descriptor_bindings = result.vertex_textures_first_bind_slot + 4; + return result; + } + chip_class get_chip_family(uint32_t vendor_id, uint32_t device_id) { if (vendor_id == 0x10DE) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 5a879fb45f..044e25a488 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -34,18 +34,6 @@ #define DESCRIPTOR_MAX_DRAW_CALLS 16384 #define OCCLUSION_MAX_POOL_SIZE DESCRIPTOR_MAX_DRAW_CALLS -#define VERTEX_PARAMS_BIND_SLOT 0 -#define VERTEX_CONSTANT_BUFFERS_BIND_SLOT 1 -#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 2 -#define FRAGMENT_STATE_BIND_SLOT 3 -#define FRAGMENT_TEXTURE_PARAMS_BIND_SLOT 4 -#define VERTEX_BUFFERS_FIRST_BIND_SLOT 5 -#define CONDITIONAL_RENDER_PREDICATE_SLOT 8 -#define TEXTURES_FIRST_BIND_SLOT 9 -#define VERTEX_TEXTURES_FIRST_BIND_SLOT (TEXTURES_FIRST_BIND_SLOT + 16) - -#define VK_NUM_DESCRIPTOR_BINDINGS (VERTEX_TEXTURES_FIRST_BIND_SLOT + 4) - #define FRAME_PRESENT_TIMEOUT 10000000ull // 10 seconds #define GENERAL_WAIT_TIMEOUT 2000000ull // 2 seconds @@ -127,6 +115,7 @@ namespace vk struct memory_type_mapping; struct gpu_formats_support; struct fence; + struct pipeline_binding_table; const vk::context *get_current_thread_ctx(); void set_current_thread_ctx(const vk::context &ctx); @@ -160,6 +149,7 @@ namespace vk memory_type_mapping get_memory_mapping(const physical_device& dev); gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev); + pipeline_binding_table get_pipeline_binding_table(const physical_device& dev); // Sync helpers around vkQueueSubmit void acquire_global_submit_lock(); @@ -242,6 +232,20 @@ namespace vk void die_with_error(const char* faulting_addr, VkResult error_code); + struct pipeline_binding_table + { + u8 vertex_params_bind_slot = 0; + u8 vertex_constant_buffers_bind_slot = 1; + u8 fragment_constant_buffers_bind_slot = 2; + u8 fragment_state_bind_slot = 3; + u8 fragment_texture_params_bind_slot = 4; + u8 vertex_buffers_first_bind_slot = 5; + u8 conditional_render_predicate_slot = 8; + u8 textures_first_bind_slot = 9; + u8 vertex_textures_first_bind_slot = 9; // Invalid, has to be initialized properly + u8 total_descriptor_bindings = vertex_textures_first_bind_slot; // Invalid, has to be initialized properly + }; + struct memory_type_mapping { uint32_t host_visible_coherent; @@ -768,6 +772,7 @@ private: physical_device *pgpu = nullptr; memory_type_mapping memory_map{}; gpu_formats_support m_formats_support{}; + pipeline_binding_table m_pipeline_binding_table{}; std::unique_ptr m_allocator; VkDevice dev = VK_NULL_HANDLE; @@ -917,6 +922,7 @@ private: memory_map = vk::get_memory_mapping(pdev); m_formats_support = vk::get_optimal_tiling_supported_formats(pdev); + m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev); if (g_cfg.video.disable_vulkan_mem_allocator) m_allocator = std::make_unique(dev, pdev); @@ -994,6 +1000,11 @@ private: return m_formats_support; } + const pipeline_binding_table& get_pipeline_binding_table() const + { + return m_pipeline_binding_table; + } + const gpu_shader_types_support& get_shader_types_support() const { return pgpu->shader_types_support; @@ -3537,8 +3548,6 @@ public: void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set); void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set); - - u64 get_vertex_input_attributes_mask(); }; } diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 7378ca9f10..a480a43d16 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -227,21 +227,5 @@ namespace vk vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr); attribute_location_mask |= (1ull << binding_point); } - - u64 program::get_vertex_input_attributes_mask() - { - if (vertex_attributes_mask) - return vertex_attributes_mask; - - for (const auto &uniform : uniforms[program_input_type::input_type_texel_buffer]) - { - if (uniform.domain == program_domain::glsl_vertex_program) - { - vertex_attributes_mask |= (1ull << (uniform.location - VERTEX_BUFFERS_FIRST_BIND_SLOT)); - } - } - - return vertex_attributes_mask; - } } } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index cde29797bf..2c4417a92d 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -66,7 +66,7 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) OS << "};\n\n"; vk::glsl::program_input in; - in.location = VERTEX_PARAMS_BIND_SLOT; + in.location = m_binding_table.vertex_params_bind_slot;; in.domain = glsl::glsl_vertex_program; in.name = "VertexContextBuffer"; in.type = vk::glsl::input_type_uniform_buffer; @@ -80,19 +80,19 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v OS << "layout(set=0, binding=7) uniform usamplerBuffer vertex_layout_stream;\n"; // Data stream defining vertex data layout vk::glsl::program_input in; - in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT; + in.location = m_binding_table.vertex_buffers_first_bind_slot; in.domain = glsl::glsl_vertex_program; in.name = "persistent_input_stream"; in.type = vk::glsl::input_type_texel_buffer; this->inputs.push_back(in); - in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT + 1; + in.location = m_binding_table.vertex_buffers_first_bind_slot + 1; in.domain = glsl::glsl_vertex_program; in.name = "volatile_input_stream"; in.type = vk::glsl::input_type_texel_buffer; this->inputs.push_back(in); - in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT + 2; + in.location = m_binding_table.vertex_buffers_first_bind_slot + 2; in.domain = glsl::glsl_vertex_program; in.name = "vertex_layout_stream"; in.type = vk::glsl::input_type_texel_buffer; @@ -107,7 +107,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std OS << "};\n\n"; vk::glsl::program_input in; - in.location = VERTEX_CONSTANT_BUFFERS_BIND_SLOT; + in.location = m_binding_table.vertex_constant_buffers_bind_slot; in.domain = glsl::glsl_vertex_program; in.name = "VertexConstantsBuffer"; in.type = vk::glsl::input_type_uniform_buffer; @@ -115,7 +115,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std inputs.push_back(in); - u32 location = VERTEX_TEXTURES_FIRST_BIND_SLOT; + u32 location = m_binding_table.vertex_textures_first_bind_slot; for (const ParamType &PT : constants) { for (const ParamItem &PI : PT.items) @@ -310,6 +310,7 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) void VKVertexDecompilerThread::Task() { m_device_props.emulate_conditional_rendering = vk::emulate_conditional_rendering(); + m_binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); m_shader = Decompile(); vk_prog->SetInputs(inputs); diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.h b/rpcs3/Emu/RSX/VK/VKVertexProgram.h index 32f6f66740..393f238563 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.h +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.h @@ -10,6 +10,7 @@ struct VKVertexDecompilerThread : public VertexProgramDecompiler std::string &m_shader; std::vector inputs; class VKVertexProgram *vk_prog; + vk::pipeline_binding_table m_binding_table{}; struct {