From fdbed734639d818d7ea8d3137d401ede4374f160 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 25 Oct 2020 15:09:39 +0300 Subject: [PATCH] [Vulkan/SPIR-V] Some pipeline layout parts + exec conditionals --- src/xenia/gpu/spirv_shader_translator.cc | 233 +++++++++++++++- src/xenia/gpu/spirv_shader_translator.h | 78 ++++++ .../gpu/vulkan/vulkan_command_processor.cc | 254 +++++++++++++++++- .../gpu/vulkan/vulkan_command_processor.h | 45 ++++ 4 files changed, 607 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 8fa3eb73e..c1b376fc0 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -31,6 +31,9 @@ void SpirvShaderTranslator::Reset() { main_switch_op_.reset(); main_switch_next_pc_phi_operands_.clear(); + + cf_exec_conditional_merge_ = nullptr; + cf_instruction_predicate_merge_ = nullptr; } void SpirvShaderTranslator::StartTranslation() { @@ -50,12 +53,15 @@ void SpirvShaderTranslator::StartTranslation() { type_bool_ = builder_->makeBoolType(); type_int_ = builder_->makeIntType(32); type_int4_ = builder_->makeVectorType(type_int_, 4); + type_uint_ = builder_->makeUintType(32); + type_uint4_ = builder_->makeVectorType(type_uint_, 4); type_float_ = builder_->makeFloatType(32); type_float2_ = builder_->makeVectorType(type_float_, 2); type_float3_ = builder_->makeVectorType(type_float_, 3); type_float4_ = builder_->makeVectorType(type_float_, 4); const_int_0_ = builder_->makeIntConstant(0); + const_uint_0_ = builder_->makeUintConstant(0); id_vector_temp_.clear(); id_vector_temp_.reserve(4); for (uint32_t i = 0; i < 4; ++i) { @@ -71,6 +77,40 @@ void SpirvShaderTranslator::StartTranslation() { const_float4_0_ = builder_->makeCompositeConstant(type_float4_, id_vector_temp_); + // Common uniform buffer - bool and loop constants. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + // 256 bool constants. + id_vector_temp_.push_back(builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4)); + // Currently (as of October 24, 2020) makeArrayType only uses the stride to + // check if deduplication can be done - the array stride decoration needs to + // be applied explicitly. + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + // 32 loop constants. + id_vector_temp_.push_back(builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(8), sizeof(uint32_t) * 4)); + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + spv::Id type_bool_loop_constants = + builder_->makeStructType(id_vector_temp_, "XeBoolLoopConstants"); + builder_->addMemberName(type_bool_loop_constants, 0, "bool_constants"); + builder_->addMemberDecoration(type_bool_loop_constants, 0, + spv::DecorationOffset, 0); + builder_->addMemberName(type_bool_loop_constants, 1, "loop_constants"); + builder_->addMemberDecoration(type_bool_loop_constants, 1, + spv::DecorationOffset, sizeof(uint32_t) * 8); + builder_->addDecoration(type_bool_loop_constants, spv::DecorationBlock); + uniform_bool_loop_constants_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_bool_loop_constants, + "xe_uniform_bool_loop_constants"); + builder_->addDecoration(uniform_bool_loop_constants_, + spv::DecorationDescriptorSet, + int(kDescriptorSetBoolLoopConstants)); + builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding, + 0); + if (IsSpirvVertexOrTessEvalShader()) { StartVertexOrTessEvalShaderBeforeMain(); } @@ -118,7 +158,7 @@ void SpirvShaderTranslator::StartTranslation() { } // Open the main loop. - spv::Block* main_loop_pre_header = builder_->getBuildPoint(); + spv::Block& main_loop_pre_header = *builder_->getBuildPoint(); main_loop_header_ = &builder_->makeNewBlock(); spv::Block& main_loop_body = builder_->makeNewBlock(); // Added later because the body has nested control flow, but according to the @@ -142,7 +182,7 @@ void SpirvShaderTranslator::StartTranslation() { id_vector_temp_.clear(); id_vector_temp_.reserve(4); id_vector_temp_.push_back(const_int_0_); - id_vector_temp_.push_back(main_loop_pre_header->getId()); + id_vector_temp_.push_back(main_loop_pre_header.getId()); main_loop_pc_next_ = builder_->getUniqueId(); id_vector_temp_.push_back(main_loop_pc_next_); id_vector_temp_.push_back(main_loop_continue_->getId()); @@ -191,6 +231,8 @@ void SpirvShaderTranslator::StartTranslation() { } std::vector SpirvShaderTranslator::CompleteTranslation() { + // Close flow control within the last switch case. + CloseExecConditionals(); bool has_main_switch = !label_addresses().empty(); // After the final exec (if it happened to be not exece, which would already // have a break branch), break from the switch if it exists, or from the @@ -283,6 +325,12 @@ void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { // 0 already added in the beginning. return; } + + assert_false(label_addresses().empty()); + + // Close flow control within the previous switch case. + CloseExecConditionals(); + spv::Function& function = builder_->getBuildPoint()->getParent(); // Create the next switch case and fallthrough to it. spv::Block* new_case = new spv::Block(builder_->getUniqueId(), function); @@ -299,6 +347,57 @@ void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { builder_->setBuildPoint(new_case); } +void SpirvShaderTranslator::ProcessExecInstructionBegin( + const ParsedExecInstruction& instr) { + UpdateExecConditionals(instr.type, instr.bool_constant_index, + instr.condition); +} + +void SpirvShaderTranslator::ProcessExecInstructionEnd( + const ParsedExecInstruction& instr) { + if (instr.is_end) { + // Break out of the main switch (if exists) and the main loop. + CloseInstructionPredication(); + if (!builder_->getBuildPoint()->isTerminated()) { + builder_->createBranch(label_addresses().empty() ? main_loop_merge_ + : main_switch_merge_); + } + } + UpdateExecConditionals(instr.type, instr.bool_constant_index, + instr.condition); +} + +void SpirvShaderTranslator::ProcessJumpInstruction( + const ParsedJumpInstruction& instr) { + // Treat like exec, merge with execs if possible, since it's an if too. + ParsedExecInstruction::Type type; + if (instr.type == ParsedJumpInstruction::Type::kConditional) { + type = ParsedExecInstruction::Type::kConditional; + } else if (instr.type == ParsedJumpInstruction::Type::kPredicated) { + type = ParsedExecInstruction::Type::kPredicated; + } else { + type = ParsedExecInstruction::Type::kUnconditional; + } + UpdateExecConditionals(type, instr.bool_constant_index, instr.condition); + + // UpdateExecConditionals may not necessarily close the instruction-level + // predicate check (it's not necessary if the execs are merged), but here the + // instruction itself is on the control flow level, so the predicate check is + // on the control flow level too. + CloseInstructionPredication(); + + JumpToLabel(instr.target_address); +} + +void SpirvShaderTranslator::EnsureBuildPointAvailable() { + if (!builder_->getBuildPoint()->isTerminated()) { + return; + } + spv::Block& new_block = builder_->makeNewBlock(); + new_block.setUnreachable(); + builder_->setBuildPoint(&new_block); +} + void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { // Create the inputs. if (IsSpirvTessEvalShader()) { @@ -373,5 +472,135 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderAfterMain( entry_point->addIdOperand(output_per_vertex_); } +void SpirvShaderTranslator::UpdateExecConditionals( + ParsedExecInstruction::Type type, uint32_t bool_constant_index, + bool condition) { + // Check if we can merge the new exec with the previous one, or the jump with + // the previous exec. The instruction-level predicate check is also merged in + // this case. + if (type == ParsedExecInstruction::Type::kConditional) { + // Can merge conditional with conditional, as long as the bool constant and + // the expected values are the same. + if (cf_exec_conditional_merge_ && + cf_exec_bool_constant_or_predicate_ == bool_constant_index && + cf_exec_condition_ == condition) { + return; + } + } else if (type == ParsedExecInstruction::Type::kPredicated) { + // Can merge predicated with predicated if the conditions are the same and + // the previous exec hasn't modified the predicate register. + if (!cf_exec_predicate_written_ && cf_exec_conditional_merge_ && + cf_exec_bool_constant_or_predicate_ == kCfExecBoolConstantPredicate && + cf_exec_condition_ == condition) { + return; + } + } else { + // Can merge unconditional with unconditional. + assert_true(type == ParsedExecInstruction::Type::kUnconditional); + if (!cf_exec_conditional_merge_) { + return; + } + } + + CloseExecConditionals(); + + if (type == ParsedExecInstruction::Type::kUnconditional) { + return; + } + + EnsureBuildPointAvailable(); + spv::Id condition_id; + if (type == ParsedExecInstruction::Type::kConditional) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // Bool constants (member 0). + id_vector_temp_.push_back(const_int_0_); + // 128-bit vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(bool_constant_index >> 7))); + // 32-bit scalar of a 128-bit vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int((bool_constant_index >> 5) & 2))); + spv::Id bool_constant_scalar = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_bool_loop_constants_, id_vector_temp_), + spv::NoPrecision); + condition_id = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, bool_constant_scalar, + builder_->makeUintConstant(uint32_t(1) + << (bool_constant_index & 31))), + const_uint_0_); + cf_exec_bool_constant_or_predicate_ = bool_constant_index; + } else if (type == ParsedExecInstruction::Type::kPredicated) { + condition_id = builder_->createLoad(var_main_predicate_, spv::NoPrecision); + cf_exec_bool_constant_or_predicate_ = kCfExecBoolConstantPredicate; + } else { + assert_unhandled_case(type); + return; + } + cf_exec_condition_ = condition; + spv::Function& function = builder_->getBuildPoint()->getParent(); + cf_exec_conditional_merge_ = + new spv::Block(builder_->getUniqueId(), function); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(cf_exec_conditional_merge_->getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op)); + } + spv::Block& inner_block = builder_->makeNewBlock(); + builder_->createConditionalBranch( + condition_id, condition ? &inner_block : cf_exec_conditional_merge_, + condition ? cf_exec_conditional_merge_ : &inner_block); + builder_->setBuildPoint(&inner_block); +} + +void SpirvShaderTranslator::CloseInstructionPredication() { + if (!cf_instruction_predicate_merge_) { + return; + } + spv::Block& inner_block = *builder_->getBuildPoint(); + if (!inner_block.isTerminated()) { + builder_->createBranch(cf_instruction_predicate_merge_); + } + inner_block.getParent().addBlock(cf_instruction_predicate_merge_); + builder_->setBuildPoint(cf_instruction_predicate_merge_); + cf_instruction_predicate_merge_ = nullptr; +} + +void SpirvShaderTranslator::CloseExecConditionals() { + // Within the exec - instruction-level predicate check. + CloseInstructionPredication(); + // Exec level. + if (cf_exec_conditional_merge_) { + spv::Block& inner_block = *builder_->getBuildPoint(); + if (!inner_block.isTerminated()) { + builder_->createBranch(cf_exec_conditional_merge_); + } + inner_block.getParent().addBlock(cf_exec_conditional_merge_); + builder_->setBuildPoint(cf_exec_conditional_merge_); + cf_exec_conditional_merge_ = nullptr; + } + // Nothing relies on the predicate value being unchanged now. + cf_exec_predicate_written_ = false; +} + +void SpirvShaderTranslator::JumpToLabel(uint32_t address) { + assert_false(label_addresses().empty()); + spv::Block& origin_block = *builder_->getBuildPoint(); + if (origin_block.isTerminated()) { + // Unreachable jump for some reason. + return; + } + main_switch_next_pc_phi_operands_.push_back( + builder_->makeIntConstant(int(address))); + main_switch_next_pc_phi_operands_.push_back(origin_block.getId()); + builder_->createBranch(main_loop_continue_); +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index e8ca2fee9..943385f20 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -23,6 +23,29 @@ namespace gpu { class SpirvShaderTranslator : public ShaderTranslator { public: + enum DescriptorSet : uint32_t { + // In order of update frequency. + // Very frequently changed, especially for UI draws, and for models drawn in + // multiple parts - contains vertex and texture fetch constants. + kDescriptorSetFetchConstants, + // Quite frequently changed (for one object drawn multiple times, for + // instance - may contain projection matrices). + kDescriptorSetFloatConstantsVertex, + // Less frequently changed (per-material). + kDescriptorSetFloatConstantsPixel, + // Per-material, combined images and samplers. + kDescriptorSetTexturesPixel, + // Rarely used at all, but may be changed at an unpredictable rate when + // vertex textures are used, combined images and samplers. + kDescriptorSetTexturesVertex, + // May stay the same across many draws. + kDescriptorSetSystemConstants, + // Pretty rarely used and rarely changed - flow control constants. + kDescriptorSetBoolLoopConstants, + // Never changed. + kDescriptorSetSharedMemoryAndEdram, + kDescriptorSetCount, + }; SpirvShaderTranslator(bool supports_clip_distance = true, bool supports_cull_distance = true); @@ -35,6 +58,10 @@ class SpirvShaderTranslator : public ShaderTranslator { void ProcessLabel(uint32_t cf_index) override; + void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; + void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override; + void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; + private: // TODO(Triang3l): Depth-only pixel shader. bool IsSpirvVertexOrTessEvalShader() const { return is_vertex_shader(); } @@ -48,11 +75,34 @@ class SpirvShaderTranslator : public ShaderTranslator { } bool IsSpirvFragmentShader() const { return is_pixel_shader(); } + // Must be called before emitting any non-control-flow SPIR-V operations in + // translator callback to ensure that if the last instruction added was + // something like OpBranch - in this case, an unreachable block is created. + void EnsureBuildPointAvailable(); + void StartVertexOrTessEvalShaderBeforeMain(); void StartVertexOrTessEvalShaderInMain(); void CompleteVertexOrTessEvalShaderInMain(); void CompleteVertexOrTessEvalShaderAfterMain(spv::Instruction* entry_point); + // Updates the current flow control condition (to be called in the beginning + // of exec and in jumps), closing the previous conditionals if needed. + // However, if the condition is not different, the instruction-level predicate + // conditional also won't be closed - this must be checked separately if + // needed (for example, in jumps). + void UpdateExecConditionals(ParsedExecInstruction::Type type, + uint32_t bool_constant_index, bool condition); + // Closes the instruction-level predicate conditional if it's open, useful if + // a control flow instruction needs to do some code which needs to respect the + // current exec conditional, but can't itself be predicated. + void CloseInstructionPredication(); + // Closes conditionals opened by exec and instructions within them (but not by + // labels) and updates the state accordingly. + void CloseExecConditionals(); + // Sets the next iteration's program counter value (adding it to phi operands) + // and closes the current block. + void JumpToLabel(uint32_t address); + bool supports_clip_distance_; bool supports_cull_distance_; @@ -68,6 +118,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id type_int_; spv::Id type_int4_; spv::Id type_uint_; + spv::Id type_uint4_; spv::Id type_float_; spv::Id type_float2_; spv::Id type_float3_; @@ -75,9 +126,12 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id const_int_0_; spv::Id const_int4_0_; + spv::Id const_uint_0_; spv::Id const_float_0_; spv::Id const_float4_0_; + spv::Id uniform_bool_loop_constants_; + // VS as VS only - int. spv::Id input_vertex_index_; // VS as TES only - int. @@ -111,6 +165,30 @@ class SpirvShaderTranslator : public ShaderTranslator { std::unique_ptr main_switch_op_; spv::Block* main_switch_merge_; std::vector main_switch_next_pc_phi_operands_; + + // If the exec bool constant / predicate conditional is open, block after it + // (not added to the function yet). + spv::Block* cf_exec_conditional_merge_; + // If the instruction-level predicate conditional is open, block after it (not + // added to the function yet). + spv::Block* cf_instruction_predicate_merge_; + // When cf_exec_conditional_merge_ is not null: + // If the current exec conditional is based on a bool constant: the number of + // the bool constant. + // If it's based on the predicate value: kCfExecBoolConstantPredicate. + uint32_t cf_exec_bool_constant_or_predicate_; + static constexpr uint32_t kCfExecBoolConstantPredicate = UINT32_MAX; + // When cf_exec_conditional_merge_ is not null, the expected bool constant or + // predicate value for the current exec conditional. + bool cf_exec_condition_; + // When cf_instruction_predicate_merge_ is not null, the expected predicate + // value for the current or the last instruction. + bool cf_instruction_predicate_condition_; + // Whether there was a `setp` in the current exec before the current + // instruction, thus instruction-level predicate value can be different than + // the exec-level predicate value, and can't merge two execs with the same + // predicate condition anymore. + bool cf_exec_predicate_written_; }; } // namespace gpu diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 0fe4e0255..aa351bf81 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -14,8 +14,9 @@ #include "xenia/base/assert.h" #include "xenia/base/logging.h" +#include "xenia/base/math.h" #include "xenia/base/profiling.h" -#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/ui/vulkan/vulkan_context.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -43,6 +44,76 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = 0; + descriptor_set_layout_create_info.pBindings = nullptr; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_empty_) != VK_SUCCESS) { + XELOGE("Failed to create an empty Vulkan descriptor set layout"); + return false; + } + VkShaderStageFlags shader_stages_guest_vertex = + GetGuestVertexShaderStageFlags(); + VkDescriptorSetLayoutBinding descriptor_set_layout_binding_uniform_buffer; + descriptor_set_layout_binding_uniform_buffer.binding = 0; + descriptor_set_layout_binding_uniform_buffer.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_uniform_buffer.descriptorCount = 1; + descriptor_set_layout_binding_uniform_buffer.stageFlags = + shader_stages_guest_vertex; + descriptor_set_layout_binding_uniform_buffer.pImmutableSamplers = nullptr; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = + &descriptor_set_layout_binding_uniform_buffer; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_uniform_buffer_guest_vertex_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for an uniform buffer " + "accessible by guest vertex shaders"); + return false; + } + descriptor_set_layout_binding_uniform_buffer.stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_uniform_buffer_guest_pixel_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for an uniform buffer " + "accessible by guest pixel shaders"); + return false; + } + descriptor_set_layout_binding_uniform_buffer.stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_uniform_buffer_guest_pixel_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for an uniform buffer " + "accessible by guest pixel shaders"); + return false; + } + descriptor_set_layout_binding_uniform_buffer.stageFlags = + shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_uniform_buffer_guest_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for an uniform buffer " + "accessible by guest shaders"); + return false; + } + shared_memory_ = std::make_unique(*this, *memory_, trace_writer_); if (!shared_memory_->Initialize()) { @@ -63,6 +134,30 @@ void VulkanCommandProcessor::ShutdownContext() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + for (const auto& pipeline_layout_pair : pipeline_layouts_) { + dfn.vkDestroyPipelineLayout( + device, pipeline_layout_pair.second.pipeline_layout, nullptr); + } + pipeline_layouts_.clear(); + for (const auto& descriptor_set_layout_pair : + descriptor_set_layouts_textures_) { + dfn.vkDestroyDescriptorSetLayout(device, descriptor_set_layout_pair.second, + nullptr); + } + descriptor_set_layouts_textures_.clear(); + + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_uniform_buffer_guest_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_uniform_buffer_guest_pixel_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_uniform_buffer_guest_vertex_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, + device, descriptor_set_layout_empty_); + sparse_bind_wait_stage_mask_ = 0; sparse_buffer_binds_.clear(); sparse_memory_binds_.clear(); @@ -141,6 +236,152 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, EndSubmission(true); } +bool VulkanCommandProcessor::GetPipelineLayout( + uint32_t texture_count_pixel, uint32_t texture_count_vertex, + PipelineLayout& pipeline_layout_out) { + PipelineLayoutKey pipeline_layout_key; + pipeline_layout_key.texture_count_pixel = texture_count_pixel; + pipeline_layout_key.texture_count_vertex = texture_count_vertex; + { + auto it = pipeline_layouts_.find(pipeline_layout_key.key); + if (it != pipeline_layouts_.end()) { + pipeline_layout_out = it->second; + return true; + } + } + + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkDescriptorSetLayout descriptor_set_layout_textures_pixel; + if (texture_count_pixel) { + TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; + texture_descriptor_set_layout_key.is_vertex = 0; + texture_descriptor_set_layout_key.texture_count = texture_count_pixel; + auto it = descriptor_set_layouts_textures_.find( + texture_descriptor_set_layout_key.key); + if (it != descriptor_set_layouts_textures_.end()) { + descriptor_set_layout_textures_pixel = it->second; + } else { + VkDescriptorSetLayoutBinding descriptor_set_layout_binding; + descriptor_set_layout_binding.binding = 0; + descriptor_set_layout_binding.descriptorType = + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptor_set_layout_binding.descriptorCount = texture_count_pixel; + descriptor_set_layout_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + descriptor_set_layout_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = + &descriptor_set_layout_binding; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_textures_pixel) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for {} combined " + "images and samplers for guest pixel shaders", + texture_count_pixel); + return false; + } + descriptor_set_layouts_textures_.emplace( + texture_descriptor_set_layout_key.key, + descriptor_set_layout_textures_pixel); + } + } else { + descriptor_set_layout_textures_pixel = descriptor_set_layout_empty_; + } + + VkDescriptorSetLayout descriptor_set_layout_textures_vertex; + if (texture_count_vertex) { + TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; + texture_descriptor_set_layout_key.is_vertex = 0; + texture_descriptor_set_layout_key.texture_count = texture_count_vertex; + auto it = descriptor_set_layouts_textures_.find( + texture_descriptor_set_layout_key.key); + if (it != descriptor_set_layouts_textures_.end()) { + descriptor_set_layout_textures_vertex = it->second; + } else { + VkDescriptorSetLayoutBinding descriptor_set_layout_binding; + descriptor_set_layout_binding.binding = 0; + descriptor_set_layout_binding.descriptorType = + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptor_set_layout_binding.descriptorCount = texture_count_vertex; + descriptor_set_layout_binding.stageFlags = + GetGuestVertexShaderStageFlags(); + descriptor_set_layout_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = + &descriptor_set_layout_binding; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_textures_vertex) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for {} combined " + "images and samplers for guest vertex shaders", + texture_count_vertex); + return false; + } + descriptor_set_layouts_textures_.emplace( + texture_descriptor_set_layout_key.key, + descriptor_set_layout_textures_vertex); + } + } else { + descriptor_set_layout_textures_vertex = descriptor_set_layout_empty_; + } + + VkDescriptorSetLayout + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetCount]; + // Fill any unused set layouts with empty layouts. + // TODO(Triang3l): Remove this. + for (size_t i = 0; i < xe::countof(descriptor_set_layouts); ++i) { + descriptor_set_layouts[i] = descriptor_set_layout_empty_; + } + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] = + descriptor_set_layout_textures_pixel; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] = + descriptor_set_layout_textures_vertex; + + VkPipelineLayoutCreateInfo pipeline_layout_create_info; + pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_create_info.pNext = nullptr; + pipeline_layout_create_info.flags = 0; + pipeline_layout_create_info.setLayoutCount = + uint32_t(xe::countof(descriptor_set_layouts)); + pipeline_layout_create_info.pSetLayouts = descriptor_set_layouts; + pipeline_layout_create_info.pushConstantRangeCount = 0; + pipeline_layout_create_info.pPushConstantRanges = nullptr; + VkPipelineLayout pipeline_layout; + if (dfn.vkCreatePipelineLayout(device, &pipeline_layout_create_info, nullptr, + &pipeline_layout) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan pipeline layout for guest drawing with {} " + "pixel shader and {} vertex shader textures", + texture_count_pixel, texture_count_vertex); + return false; + } + PipelineLayout pipeline_layout_entry; + pipeline_layout_entry.pipeline_layout = pipeline_layout; + pipeline_layout_entry.descriptor_set_layout_textures_pixel_ref = + descriptor_set_layout_textures_pixel; + pipeline_layout_entry.descriptor_set_layout_textures_vertex_ref = + descriptor_set_layout_textures_vertex; + pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry); + pipeline_layout_out = pipeline_layout_entry; + return true; +} + Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, @@ -545,6 +786,17 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { return true; } +VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() + const { + VkShaderStageFlags stages = VK_SHADER_STAGE_VERTEX_BIT; + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + if (provider.device_features().tessellationShader) { + stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + } + return stages; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 036c391b1..b00cab90a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -64,6 +65,15 @@ class VulkanCommandProcessor : public CommandProcessor { const VkSparseMemoryBind* binds, VkPipelineStageFlags wait_stage_mask); + struct PipelineLayout { + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref; + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref; + }; + bool GetPipelineLayout(uint32_t texture_count_pixel, + uint32_t texture_count_vertex, + PipelineLayout& pipeline_layout_out); + protected: bool SetupContext() override; void ShutdownContext() override; @@ -105,6 +115,8 @@ class VulkanCommandProcessor : public CommandProcessor { return !submission_open_ && submissions_in_flight_fences_.empty(); } + VkShaderStageFlags GetGuestVertexShaderStageFlags() const; + bool cache_clear_requested_ = false; std::vector fences_free_; @@ -150,6 +162,39 @@ class VulkanCommandProcessor : public CommandProcessor { std::vector sparse_buffer_bind_infos_temp_; VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0; + // Common descriptor set layouts, usable by anything that may need them. + VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_vertex_ = + VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_pixel_ = + VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_ = + VK_NULL_HANDLE; + + union TextureDescriptorSetLayoutKey { + struct { + uint32_t is_vertex : 1; + // For 0, use descriptor_set_layout_empty_ instead as these are owning + // references. + uint32_t texture_count : 31; + }; + uint32_t key = 0; + }; + // TextureDescriptorSetLayoutKey::key -> VkDescriptorSetLayout. + std::unordered_map + descriptor_set_layouts_textures_; + union PipelineLayoutKey { + struct { + // Pixel textures in the low bits since those are varied much more + // commonly. + uint32_t texture_count_pixel : 16; + uint32_t texture_count_vertex : 16; + }; + uint32_t key = 0; + }; + // PipelineLayoutKey::key -> PipelineLayout. + std::unordered_map pipeline_layouts_; + std::unique_ptr shared_memory_; };