diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 3fbc29220..dd7cac5ba 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -283,47 +283,41 @@ void SpirvShaderTranslator::StartTranslation() { } // Common storage buffers - shared memory uint[], each 128 MB or larger, - // depending on what's possible on the device. glslang generates everything, - // including all the types, for each storage buffer separately. - uint32_t shared_memory_binding_count = + // depending on what's possible on the device. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_)); + // Storage buffers have std430 packing, no padding to 4-component vectors. + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + spv::Id type_shared_memory = + builder_->makeStructType(id_vector_temp_, "XeSharedMemory"); + builder_->addMemberName(type_shared_memory, 0, "shared_memory"); + // TODO(Triang3l): Make writable when memexport is implemented. + builder_->addMemberDecoration(type_shared_memory, 0, + spv::DecorationNonWritable); + builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset, + 0); + builder_->addDecoration(type_shared_memory, + features_.spirv_version >= spv::Spv_1_3 + ? spv::DecorationBlock + : spv::DecorationBufferBlock); + unsigned int shared_memory_binding_count = 1 << GetSharedMemoryStorageBufferCountLog2(); - char shared_memory_struct_name[] = "XeSharedMemory0"; - char shared_memory_buffer_name[] = "xe_shared_memory_0"; - for (uint32_t i = 0; i < shared_memory_binding_count; ++i) { - id_vector_temp_.clear(); - id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_)); - // Storage buffers have std430 packing, no padding to 4-component vectors. - builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, - sizeof(uint32_t)); - shared_memory_struct_name[xe::countof(shared_memory_struct_name) - 2] = - '0' + i; - spv::Id type_shared_memory = - builder_->makeStructType(id_vector_temp_, shared_memory_struct_name); - builder_->addMemberName(type_shared_memory, 0, "shared_memory"); - // TODO(Triang3l): Make writable when memexport is implemented. - builder_->addMemberDecoration(type_shared_memory, 0, - spv::DecorationNonWritable); - builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset, - 0); - builder_->addDecoration(type_shared_memory, - features_.spirv_version >= spv::Spv_1_3 - ? spv::DecorationBlock - : spv::DecorationBufferBlock); - shared_memory_buffer_name[xe::countof(shared_memory_buffer_name) - 2] = - '0' + i; - spv::Id buffer_shared_memory = builder_->createVariable( - spv::NoPrecision, - features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer - : spv::StorageClassUniform, - type_shared_memory, shared_memory_buffer_name); - buffers_shared_memory_[i] = buffer_shared_memory; - builder_->addDecoration(buffer_shared_memory, spv::DecorationDescriptorSet, - int(kDescriptorSetSharedMemoryAndEdram)); - builder_->addDecoration(buffer_shared_memory, spv::DecorationBinding, - int(i)); - if (features_.spirv_version >= spv::Spv_1_4) { - main_interface_.push_back(buffer_shared_memory); - } + if (shared_memory_binding_count > 1) { + type_shared_memory = builder_->makeArrayType( + type_shared_memory, + builder_->makeUintConstant(shared_memory_binding_count), 0); + } + buffers_shared_memory_ = builder_->createVariable( + spv::NoPrecision, + features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer + : spv::StorageClassUniform, + type_shared_memory, "xe_shared_memory"); + builder_->addDecoration(buffers_shared_memory_, spv::DecorationDescriptorSet, + int(kDescriptorSetSharedMemoryAndEdram)); + builder_->addDecoration(buffers_shared_memory_, spv::DecorationBinding, 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(buffers_shared_memory_); } if (IsSpirvVertexOrTessEvalShader()) { @@ -1690,13 +1684,14 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( id_vector_temp_.push_back(const_int_0_); id_vector_temp_.push_back(address_dwords_int); return builder_->createLoad( - builder_->createAccessChain(storage_class, buffers_shared_memory_[0], + builder_->createAccessChain(storage_class, buffers_shared_memory_, id_vector_temp_), spv::NoPrecision); } // The memory is split into multiple bindings - check which binding to load - // from. 29 is log2(512 MB), but addressing in dwords (4 B). + // from. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the + // array with the variable itself because it needs VK_EXT_descriptor_indexing. uint32_t binding_address_bits = (29 - 2) - buffer_count_log2; spv::Id binding_index = builder_->createBinOp( spv::OpShiftRightLogical, type_uint_, @@ -1732,16 +1727,16 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( } builder_->getBuildPoint()->addInstruction(std::move(switch_op)); } - // Set up the access chain indices. - id_vector_temp_.clear(); - id_vector_temp_.reserve(2); - // The only SSBO struct member. - id_vector_temp_.push_back(const_int_0_); - id_vector_temp_.push_back(binding_address); for (uint32_t i = 0; i < buffer_count; ++i) { builder_->setBuildPoint(switch_case_blocks[i]); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); + // The only SSBO struct member. + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(binding_address); value_phi_op->addIdOperand(builder_->createLoad( - builder_->createAccessChain(storage_class, buffers_shared_memory_[i], + builder_->createAccessChain(storage_class, buffers_shared_memory_, id_vector_temp_), spv::NoPrecision)); value_phi_op->addIdOperand(switch_case_blocks[i]->getId()); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index e98df7d2e..bb74d5ab9 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -312,7 +312,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id uniform_bool_loop_constants_; spv::Id uniform_fetch_constants_; - spv::Id buffers_shared_memory_[512 / 128]; + spv::Id buffers_shared_memory_; // VS as VS only - int. spv::Id input_vertex_index_; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index aa351bf81..4a7f1e5af 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -70,47 +70,81 @@ bool VulkanCommandProcessor::SetupContext() { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; descriptor_set_layout_binding_uniform_buffer.descriptorCount = 1; descriptor_set_layout_binding_uniform_buffer.stageFlags = - shader_stages_guest_vertex; + shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; descriptor_set_layout_binding_uniform_buffer.pImmutableSamplers = nullptr; descriptor_set_layout_create_info.bindingCount = 1; descriptor_set_layout_create_info.pBindings = &descriptor_set_layout_binding_uniform_buffer; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_uniform_buffer_guest_vertex_) != VK_SUCCESS) { + &descriptor_set_layout_ub_fetch_bool_loop_constants_) != VK_SUCCESS) { XELOGE( - "Failed to create a Vulkan descriptor set layout for an uniform buffer " - "accessible by guest vertex shaders"); + "Failed to create a Vulkan descriptor set layout for the fetch, bool " + "and loop constants uniform buffer"); + return false; + } + descriptor_set_layout_binding_uniform_buffer.stageFlags = + shader_stages_guest_vertex; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_ub_float_constants_vertex_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for the vertex shader " + "float constants uniform buffer"); return false; } descriptor_set_layout_binding_uniform_buffer.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_uniform_buffer_guest_pixel_) != VK_SUCCESS) { + &descriptor_set_layout_ub_float_constants_pixel_) != VK_SUCCESS) { XELOGE( - "Failed to create a Vulkan descriptor set layout for an uniform buffer " - "accessible by guest pixel shaders"); - return false; - } - descriptor_set_layout_binding_uniform_buffer.stageFlags = - VK_SHADER_STAGE_FRAGMENT_BIT; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_uniform_buffer_guest_pixel_) != VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for an uniform buffer " - "accessible by guest pixel shaders"); + "Failed to create a Vulkan descriptor set layout for the pixel shader " + "float constants uniform buffer"); return false; } descriptor_set_layout_binding_uniform_buffer.stageFlags = shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; + if (provider.device_features().tessellationShader) { + descriptor_set_layout_binding_uniform_buffer.stageFlags |= + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + } if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_uniform_buffer_guest_) != VK_SUCCESS) { + &descriptor_set_layout_ub_system_constants_) != VK_SUCCESS) { XELOGE( - "Failed to create a Vulkan descriptor set layout for an uniform buffer " - "accessible by guest shaders"); + "Failed to create a Vulkan descriptor set layout for the system " + "constants uniform buffer"); + return false; + } + uint32_t shared_memory_binding_count_log2 = + SpirvShaderTranslator::GetSharedMemoryStorageBufferCountLog2( + provider.device_properties().limits.maxStorageBufferRange); + uint32_t shared_memory_binding_count = uint32_t(1) + << shared_memory_binding_count_log2; + VkDescriptorSetLayoutBinding + descriptor_set_layout_binding_shared_memory_and_edram[1]; + descriptor_set_layout_binding_shared_memory_and_edram[0].binding = 0; + descriptor_set_layout_binding_shared_memory_and_edram[0].descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_set_layout_binding_shared_memory_and_edram[0].descriptorCount = + shared_memory_binding_count; + // TODO(Triang3l): When fullDrawIndexUint32 fallback is added, force host + // vertex shader access to the shared memory for the tessellation vertex + // shader (to retrieve tessellation factors). + descriptor_set_layout_binding_shared_memory_and_edram[0].stageFlags = + shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; + descriptor_set_layout_binding_shared_memory_and_edram[0].pImmutableSamplers = + nullptr; + // TODO(Triang3l): EDRAM binding for the fragment shader interlocks case. + descriptor_set_layout_create_info.pBindings = + descriptor_set_layout_binding_shared_memory_and_edram; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_shared_memory_and_edram_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for the shared memory " + "and the EDRAM"); return false; } @@ -148,13 +182,19 @@ void VulkanCommandProcessor::ShutdownContext() { ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_uniform_buffer_guest_); + descriptor_set_layout_shared_memory_and_edram_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_uniform_buffer_guest_pixel_); + descriptor_set_layout_ub_system_constants_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_uniform_buffer_guest_vertex_); + descriptor_set_layout_ub_float_constants_pixel_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_ub_float_constants_vertex_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_ub_fetch_bool_loop_constants_); ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_empty_); @@ -794,6 +834,8 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() if (provider.device_features().tessellationShader) { stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; } + // TODO(Triang3l): Vertex to compute translation for rectangle and possibly + // point emulation. return stages; } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index b00cab90a..607016731 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -162,13 +162,17 @@ class VulkanCommandProcessor : public CommandProcessor { std::vector sparse_buffer_bind_infos_temp_; VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0; - // Common descriptor set layouts, usable by anything that may need them. + // Descriptor set layouts used by different shaders. VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_vertex_ = + VkDescriptorSetLayout descriptor_set_layout_ub_fetch_bool_loop_constants_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_pixel_ = + VkDescriptorSetLayout descriptor_set_layout_ub_float_constants_vertex_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_ = + VkDescriptorSetLayout descriptor_set_layout_ub_float_constants_pixel_ = + VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_ub_system_constants_ = + VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = VK_NULL_HANDLE; union TextureDescriptorSetLayoutKey {