From e4de8663c4c85d148f58fe47538b98757190f90e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 7 Jul 2022 21:05:56 +0300 Subject: [PATCH] [Vulkan] All guest draw uniform buffer bindings in a single descriptor set Reduce the number of bound descriptor sets from 10 to 6, which is still above the minimum limit of 4, but closer --- src/xenia/gpu/spirv_shader_translator.cc | 25 +- src/xenia/gpu/spirv_shader_translator.h | 24 +- .../gpu/vulkan/vulkan_command_processor.cc | 503 +++++++++--------- .../gpu/vulkan/vulkan_command_processor.h | 14 +- 4 files changed, 291 insertions(+), 275 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index cfbbd28e4..222b23b48 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -257,8 +257,9 @@ void SpirvShaderTranslator::StartTranslation() { "xe_uniform_system_constants"); builder_->addDecoration(uniform_system_constants_, spv::DecorationDescriptorSet, - kDescriptorSetSystemConstants); - builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding, 0); + int(kDescriptorSetConstants)); + builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding, + int(kConstantBufferSystem)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_system_constants_); } @@ -285,12 +286,13 @@ void SpirvShaderTranslator::StartTranslation() { uniform_float_constants_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassUniform, type_float_constants, "xe_uniform_float_constants"); + builder_->addDecoration(uniform_float_constants_, + spv::DecorationDescriptorSet, + int(kDescriptorSetConstants)); builder_->addDecoration( - uniform_float_constants_, spv::DecorationDescriptorSet, - int(is_pixel_shader() ? kDescriptorSetFloatConstantsPixel - : kDescriptorSetFloatConstantsVertex)); - builder_->addDecoration(uniform_float_constants_, spv::DecorationBinding, - 0); + uniform_float_constants_, spv::DecorationBinding, + int(is_pixel_shader() ? kConstantBufferFloatPixel + : kConstantBufferFloatVertex)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_float_constants_); } @@ -326,9 +328,9 @@ void SpirvShaderTranslator::StartTranslation() { "xe_uniform_bool_loop_constants"); builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationDescriptorSet, - int(kDescriptorSetBoolLoopConstants)); + int(kDescriptorSetConstants)); builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding, - 0); + int(kConstantBufferBoolLoop)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_bool_loop_constants_); } @@ -352,8 +354,9 @@ void SpirvShaderTranslator::StartTranslation() { "xe_uniform_fetch_constants"); builder_->addDecoration(uniform_fetch_constants_, spv::DecorationDescriptorSet, - int(kDescriptorSetFetchConstants)); - builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, 0); + int(kDescriptorSetConstants)); + builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, + int(kConstantBufferFetch)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_fetch_constants_); } diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index aca23efe5..884649d58 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -131,6 +131,16 @@ class SpirvShaderTranslator : public ShaderTranslator { float color_exp_bias[4]; }; + enum ConstantBuffer : uint32_t { + kConstantBufferSystem, + kConstantBufferFloatVertex, + kConstantBufferFloatPixel, + kConstantBufferBoolLoop, + kConstantBufferFetch, + + kConstantBufferCount, + }; + // The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for // maxStorageBufferRange it's 128 MB. These are the values of those limits on // Arm Mali as of November 2020. Xenia needs 512 MB shared memory to be bound, @@ -159,18 +169,8 @@ class SpirvShaderTranslator : public ShaderTranslator { // Never changed. kDescriptorSetSharedMemoryAndEdram, - // Pretty rarely used and rarely changed - flow control constants. - kDescriptorSetBoolLoopConstants, - // May stay the same across many draws. - kDescriptorSetSystemConstants, - // Less frequently changed (per-material). - kDescriptorSetFloatConstantsPixel, - // Quite frequently changed (for one object drawn multiple times, for - // instance - may contain projection matrices). - kDescriptorSetFloatConstantsVertex, - // Very frequently changed, especially for UI draws, and for models drawn in - // multiple parts - contains vertex and texture fetch constants. - kDescriptorSetFetchConstants, + // Changed in case of changes in the data. + kDescriptorSetConstants, // Mutable part of the pipeline layout: kDescriptorSetMutableLayoutsStart, diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index aa9f2e4ee..c6eb8bb54 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -59,7 +59,8 @@ VulkanCommandProcessor::VulkanCommandProcessor( transient_descriptor_allocator_uniform_buffer_( *static_cast( graphics_system->provider()), - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768), + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + SpirvShaderTranslator::kConstantBufferCount * 32768, 32768), transient_descriptor_allocator_storage_buffer_( *static_cast( graphics_system->provider()), @@ -176,84 +177,61 @@ bool VulkanCommandProcessor::SetupContext() { "and the EDRAM"); return false; } - // Transient: uniform buffer for the guest vertex shader stages. + // Guest draw constants. + VkDescriptorSetLayoutBinding descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferCount] = {}; + for (uint32_t i = 0; i < SpirvShaderTranslator::kConstantBufferCount; ++i) { + VkDescriptorSetLayoutBinding& constants_binding = + descriptor_set_layout_bindings_constants[i]; + constants_binding.binding = i; + constants_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + constants_binding.descriptorCount = 1; + constants_binding.pImmutableSamplers = nullptr; + } + descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferSystem] + .stageFlags = + guest_shader_stages | + (device_features.tessellationShader + ? VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT + : 0) | + (device_features.geometryShader ? VK_SHADER_STAGE_GEOMETRY_BIT : 0); + descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferFloatVertex] + .stageFlags = guest_shader_vertex_stages_; + descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferFloatPixel] + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferBoolLoop] + .stageFlags = guest_shader_stages; + descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferFetch] + .stageFlags = guest_shader_stages; + descriptor_set_layout_create_info.bindingCount = + uint32_t(xe::countof(descriptor_set_layout_bindings_constants)); + descriptor_set_layout_create_info.pBindings = + descriptor_set_layout_bindings_constants; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_constants_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for guest draw " + "constant buffers"); + return false; + } + // Transient: uniform buffer for compute shaders. VkDescriptorSetLayoutBinding descriptor_set_layout_binding_transient; descriptor_set_layout_binding_transient.binding = 0; descriptor_set_layout_binding_transient.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; descriptor_set_layout_binding_transient.descriptorCount = 1; descriptor_set_layout_binding_transient.stageFlags = - guest_shader_vertex_stages_; + VK_SHADER_STAGE_COMPUTE_BIT; descriptor_set_layout_binding_transient.pImmutableSamplers = nullptr; descriptor_set_layout_create_info.bindingCount = 1; descriptor_set_layout_create_info.pBindings = &descriptor_set_layout_binding_transient; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layouts_single_transient_[size_t( - SingleTransientDescriptorLayout::kUniformBufferGuestVertex)]) != - VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for a uniform buffer " - "bound to the guest vertex shader stages"); - return false; - } - // Transient: uniform buffer for fragment shaders. - descriptor_set_layout_binding_transient.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - descriptor_set_layout_binding_transient.stageFlags = - VK_SHADER_STAGE_FRAGMENT_BIT; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layouts_single_transient_[size_t( - SingleTransientDescriptorLayout::kUniformBufferFragment)]) != - VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for a uniform buffer " - "bound to the fragment shader"); - return false; - } - // Transient: uniform buffer for the guest shader stages. - descriptor_set_layout_binding_transient.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - descriptor_set_layout_binding_transient.stageFlags = guest_shader_stages; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layouts_single_transient_[size_t( - SingleTransientDescriptorLayout::kUniformBufferGuestShader)]) != - VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for a uniform buffer " - "bound to the guest shader stages"); - return false; - } - // Transient: system constants. - descriptor_set_layout_binding_transient.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - descriptor_set_layout_binding_transient.stageFlags = guest_shader_stages; - if (device_features.tessellationShader) { - descriptor_set_layout_binding_transient.stageFlags |= - VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - } - if (device_features.geometryShader) { - descriptor_set_layout_binding_transient.stageFlags |= - VK_SHADER_STAGE_GEOMETRY_BIT; - } - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layouts_single_transient_[size_t( - SingleTransientDescriptorLayout :: - kUniformBufferSystemConstants)]) != VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for the system " - "constants uniform buffer"); - return false; - } - // Transient: uniform buffer for compute shaders. - descriptor_set_layout_binding_transient.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - descriptor_set_layout_binding_transient.stageFlags = - VK_SHADER_STAGE_COMPUTE_BIT; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, &descriptor_set_layouts_single_transient_[size_t( @@ -1052,6 +1030,9 @@ void VulkanCommandProcessor::ShutdownContext() { dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_single_transient); } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, + device, + descriptor_set_layout_constants_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_shared_memory_and_edram_); @@ -1134,27 +1115,25 @@ void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) { float_constant_index -= 256; if (current_float_constant_map_pixel_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~(UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); + current_constant_buffers_up_to_date_ &= ~( + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatPixel); } } else { if (current_float_constant_map_vertex_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~(UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); + current_constant_buffers_up_to_date_ &= ~( + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatVertex); } } } } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { - current_graphics_descriptor_set_values_up_to_date_ &= ~( - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants); + current_constant_buffers_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop); } else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 && index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants); + current_constant_buffers_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch); if (texture_cache_) { texture_cache_->TextureFetchConstantWritten( (index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6); @@ -1890,24 +1869,8 @@ VulkanCommandProcessor::GetPipelineLayout(size_t texture_count_pixel, descriptor_set_layouts [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = descriptor_set_layout_shared_memory_and_edram_; - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = - GetSingleTransientDescriptorLayout( - SingleTransientDescriptorLayout::kUniformBufferGuestShader); - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] = - GetSingleTransientDescriptorLayout( - SingleTransientDescriptorLayout::kUniformBufferSystemConstants); - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = - GetSingleTransientDescriptorLayout( - SingleTransientDescriptorLayout::kUniformBufferFragment); - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = - GetSingleTransientDescriptorLayout( - SingleTransientDescriptorLayout::kUniformBufferGuestVertex); - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] = - GetSingleTransientDescriptorLayout( - SingleTransientDescriptorLayout::kUniformBufferGuestShader); + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetConstants] = + descriptor_set_layout_constants_; // Mutable layouts. descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSamplersVertex] = descriptor_set_layout_samplers_vertex; @@ -2774,6 +2737,7 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { sizeof(current_float_constant_map_pixel_)); std::memset(current_graphics_descriptor_sets_, 0, sizeof(current_graphics_descriptor_sets_)); + current_constant_buffers_up_to_date_ = 0; current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = shared_memory_and_edram_descriptor_set_; @@ -2797,6 +2761,16 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { .push_back(used_transient_descriptor.set); single_transient_descriptors_used_.pop_front(); } + while (!constants_transient_descriptors_used_.empty()) { + const std::pair& used_transient_descriptor = + constants_transient_descriptors_used_.front(); + if (used_transient_descriptor.first > frame_completed_) { + break; + } + constants_transient_descriptors_free_.push_back( + used_transient_descriptor.second); + constants_transient_descriptors_used_.pop_front(); + } while (!texture_transient_descriptor_sets_used_.empty()) { const UsedTextureTransientDescriptorSet& used_transient_descriptor_set = texture_transient_descriptor_sets_used_.front(); @@ -3092,6 +3066,8 @@ void VulkanCommandProcessor::ClearTransientDescriptorPools() { transient_descriptor_allocator_sampler_.Reset(); transient_descriptor_allocator_sampled_image_.Reset(); + constants_transient_descriptors_free_.clear(); + constants_transient_descriptors_used_.clear(); for (std::vector& transient_descriptors_free : single_transient_descriptors_free_) { transient_descriptors_free.clear(); @@ -3520,8 +3496,8 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( } if (dirty) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); + current_constant_buffers_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kConstantBufferSystem); } } @@ -3537,7 +3513,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - // Invalidate descriptors for changed data. + // Invalidate constant buffers and descriptors for changed data. // Float constants. // These are the constant base addresses/ranges for shaders. @@ -3559,10 +3535,8 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // If no float constants at all, any buffer can be reused for them, so not // invalidating. if (float_constant_count_vertex) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~( - UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); + current_constant_buffers_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatVertex); } } } @@ -3577,9 +3551,8 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, current_float_constant_map_pixel_[i] = float_constant_map_pixel.float_bitmap[i]; if (float_constant_count_pixel) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~(UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); + current_constant_buffers_up_to_date_ &= ~( + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatPixel); } } } @@ -3588,6 +3561,141 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, sizeof(current_float_constant_map_pixel_)); } + // Write the new constant buffers. + constexpr uint32_t kAllConstantBuffersMask = + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferCount) - 1; + assert_zero(current_constant_buffers_up_to_date_ & ~kAllConstantBuffersMask); + if ((current_constant_buffers_up_to_date_ & kAllConstantBuffersMask) != + kAllConstantBuffersMask) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetConstants); + size_t uniform_buffer_alignment = size_t( + provider.device_properties().limits.minUniformBufferOffsetAlignment); + // System constants. + if (!(current_constant_buffers_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferSystem))) { + VkDescriptorBufferInfo& buffer_info = current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferSystem]; + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, sizeof(SpirvShaderTranslator::SystemConstants), + uniform_buffer_alignment, buffer_info.buffer, buffer_info.offset); + if (!mapping) { + return false; + } + buffer_info.range = sizeof(SpirvShaderTranslator::SystemConstants); + std::memcpy(mapping, &system_constants_, + sizeof(SpirvShaderTranslator::SystemConstants)); + current_constant_buffers_up_to_date_ |= + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferSystem; + } + // Vertex shader float constants. + if (!(current_constant_buffers_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatVertex))) { + VkDescriptorBufferInfo& buffer_info = current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferFloatVertex]; + // Even if the shader doesn't need any float constants, a valid binding + // must still be provided (the pipeline layout always has float constants, + // for both the vertex shader and the pixel shader), so if the first draw + // in the frame doesn't have float constants at all, still allocate a + // dummy buffer. + size_t float_constants_size = + sizeof(float) * 4 * + std::max(float_constant_count_vertex, UINT32_C(1)); + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, float_constants_size, uniform_buffer_alignment, + buffer_info.buffer, buffer_info.offset); + if (!mapping) { + return false; + } + buffer_info.range = VkDeviceSize(float_constants_size); + for (uint32_t i = 0; i < 4; ++i) { + uint64_t float_constant_map_entry = + current_float_constant_map_vertex_[i]; + uint32_t float_constant_index; + while (xe::bit_scan_forward(float_constant_map_entry, + &float_constant_index)) { + float_constant_map_entry &= ~(1ull << float_constant_index); + std::memcpy(mapping, + ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + + (float_constant_index << 2)] + .f32, + sizeof(float) * 4); + mapping += sizeof(float) * 4; + } + } + current_constant_buffers_up_to_date_ |= + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatVertex; + } + // Pixel shader float constants. + if (!(current_constant_buffers_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatPixel))) { + VkDescriptorBufferInfo& buffer_info = current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferFloatPixel]; + size_t float_constants_size = + sizeof(float) * 4 * std::max(float_constant_count_pixel, UINT32_C(1)); + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, float_constants_size, uniform_buffer_alignment, + buffer_info.buffer, buffer_info.offset); + if (!mapping) { + return false; + } + buffer_info.range = VkDeviceSize(float_constants_size); + for (uint32_t i = 0; i < 4; ++i) { + uint64_t float_constant_map_entry = + current_float_constant_map_pixel_[i]; + uint32_t float_constant_index; + while (xe::bit_scan_forward(float_constant_map_entry, + &float_constant_index)) { + float_constant_map_entry &= ~(1ull << float_constant_index); + std::memcpy(mapping, + ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + + (float_constant_index << 2)] + .f32, + sizeof(float) * 4); + mapping += sizeof(float) * 4; + } + } + current_constant_buffers_up_to_date_ |= + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatPixel; + } + // Bool and loop constants. + if (!(current_constant_buffers_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop))) { + VkDescriptorBufferInfo& buffer_info = current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferBoolLoop]; + constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32); + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, kBoolLoopConstantsSize, uniform_buffer_alignment, + buffer_info.buffer, buffer_info.offset); + if (!mapping) { + return false; + } + buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize); + std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + kBoolLoopConstantsSize); + current_constant_buffers_up_to_date_ |= + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop; + } + // Fetch constants. + if (!(current_constant_buffers_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch))) { + VkDescriptorBufferInfo& buffer_info = current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferFetch]; + constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32; + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, kFetchConstantsSize, uniform_buffer_alignment, + buffer_info.buffer, buffer_info.offset); + if (!mapping) { + return false; + } + buffer_info.range = VkDeviceSize(kFetchConstantsSize); + std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, + kFetchConstantsSize); + current_constant_buffers_up_to_date_ |= + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch; + } + } + // Textures and samplers. const std::vector& samplers_vertex = vertex_shader->GetSamplerBindingsAfterTranslation(); @@ -3694,158 +3802,57 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // Write the new descriptor sets. - VkWriteDescriptorSet - write_descriptor_sets[SpirvShaderTranslator::kDescriptorSetCount]; + // Consecutive bindings updated via a single VkWriteDescriptorSet must have + // identical stage flags, but for the constants they vary. + std::array + write_descriptor_sets; uint32_t write_descriptor_set_count = 0; uint32_t write_descriptor_set_bits = 0; assert_not_zero( current_graphics_descriptor_set_values_up_to_date_ & (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram)); - // Bool and loop constants. - VkDescriptorBufferInfo buffer_info_bool_loop_constants; + // Constant buffers. if (!(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants))) { - VkWriteDescriptorSet& write_bool_loop_constants = - write_descriptor_sets[write_descriptor_set_count++]; - constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32); - uint8_t* mapping_bool_loop_constants = WriteTransientUniformBufferBinding( - kBoolLoopConstantsSize, - SingleTransientDescriptorLayout::kUniformBufferGuestShader, - buffer_info_bool_loop_constants, write_bool_loop_constants); - if (!mapping_bool_loop_constants) { - return false; - } - std::memcpy(mapping_bool_loop_constants, - ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, - kBoolLoopConstantsSize); - write_descriptor_set_bits |= - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants; - current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = - write_bool_loop_constants.dstSet; - } - // System constants. - VkDescriptorBufferInfo buffer_info_system_constants; - if (!(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetSystemConstants))) { - VkWriteDescriptorSet& write_system_constants = - write_descriptor_sets[write_descriptor_set_count++]; - uint8_t* mapping_system_constants = WriteTransientUniformBufferBinding( - sizeof(SpirvShaderTranslator::SystemConstants), - SingleTransientDescriptorLayout::kUniformBufferSystemConstants, - buffer_info_system_constants, write_system_constants); - if (!mapping_system_constants) { - return false; - } - std::memcpy(mapping_system_constants, &system_constants_, - sizeof(SpirvShaderTranslator::SystemConstants)); - write_descriptor_set_bits |= - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants; - current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetSystemConstants] = - write_system_constants.dstSet; - } - // Pixel shader float constants. - VkDescriptorBufferInfo buffer_info_float_constant_pixel; - if (!(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel))) { - // Even if the shader doesn't need any float constants, a valid binding must - // still be provided (the pipeline layout always has float constants, for - // both the vertex shader and the pixel shader), so if the first draw in the - // frame doesn't have float constants at all, still allocate an empty - // buffer. - VkWriteDescriptorSet& write_float_constants_pixel = - write_descriptor_sets[write_descriptor_set_count++]; - uint8_t* mapping_float_constants_pixel = WriteTransientUniformBufferBinding( - sizeof(float) * 4 * std::max(float_constant_count_pixel, UINT32_C(1)), - SingleTransientDescriptorLayout::kUniformBufferFragment, - buffer_info_float_constant_pixel, write_float_constants_pixel); - if (!mapping_float_constants_pixel) { - return false; - } - for (uint32_t i = 0; i < 4; ++i) { - uint64_t float_constant_map_entry = current_float_constant_map_pixel_[i]; - uint32_t float_constant_index; - while (xe::bit_scan_forward(float_constant_map_entry, - &float_constant_index)) { - float_constant_map_entry &= ~(1ull << float_constant_index); - std::memcpy(mapping_float_constants_pixel, - ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + - (float_constant_index << 2)] - .f32, - sizeof(float) * 4); - mapping_float_constants_pixel += sizeof(float) * 4; + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetConstants))) { + VkDescriptorSet constants_descriptor_set; + if (!constants_transient_descriptors_free_.empty()) { + constants_descriptor_set = constants_transient_descriptors_free_.back(); + constants_transient_descriptors_free_.pop_back(); + } else { + constants_descriptor_set = + transient_descriptor_allocator_uniform_buffer_.Allocate( + descriptor_set_layout_constants_, + SpirvShaderTranslator::kConstantBufferCount); + if (constants_descriptor_set == VK_NULL_HANDLE) { + return false; } } - write_descriptor_set_bits |= - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel; - current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = - write_float_constants_pixel.dstSet; - } - // Vertex shader float constants. - VkDescriptorBufferInfo buffer_info_float_constant_vertex; - if (!(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex))) { - VkWriteDescriptorSet& write_float_constants_vertex = - write_descriptor_sets[write_descriptor_set_count++]; - uint8_t* mapping_float_constants_vertex = - WriteTransientUniformBufferBinding( - sizeof(float) * 4 * - std::max(float_constant_count_vertex, UINT32_C(1)), - SingleTransientDescriptorLayout::kUniformBufferGuestVertex, - buffer_info_float_constant_vertex, write_float_constants_vertex); - if (!mapping_float_constants_vertex) { - return false; - } - for (uint32_t i = 0; i < 4; ++i) { - uint64_t float_constant_map_entry = current_float_constant_map_vertex_[i]; - uint32_t float_constant_index; - while (xe::bit_scan_forward(float_constant_map_entry, - &float_constant_index)) { - float_constant_map_entry &= ~(1ull << float_constant_index); - std::memcpy(mapping_float_constants_vertex, - ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + - (float_constant_index << 2)] - .f32, - sizeof(float) * 4); - mapping_float_constants_vertex += sizeof(float) * 4; - } + constants_transient_descriptors_used_.emplace_back( + frame_current_, constants_descriptor_set); + // Consecutive bindings updated via a single VkWriteDescriptorSet must have + // identical stage flags, but for the constants they vary. + for (uint32_t i = 0; i < SpirvShaderTranslator::kConstantBufferCount; ++i) { + VkWriteDescriptorSet& write_constants = + write_descriptor_sets[write_descriptor_set_count++]; + write_constants.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_constants.pNext = nullptr; + write_constants.dstSet = constants_descriptor_set; + write_constants.dstBinding = i; + write_constants.dstArrayElement = 0; + write_constants.descriptorCount = 1; + write_constants.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + write_constants.pImageInfo = nullptr; + write_constants.pBufferInfo = ¤t_constant_buffer_infos_[i]; + write_constants.pTexelBufferView = nullptr; } write_descriptor_set_bits |= - UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex; + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetConstants; current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = - write_float_constants_vertex.dstSet; - } - // Fetch constants. - VkDescriptorBufferInfo buffer_info_fetch_constants; - if (!(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants))) { - VkWriteDescriptorSet& write_fetch_constants = - write_descriptor_sets[write_descriptor_set_count++]; - constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32; - uint8_t* mapping_fetch_constants = WriteTransientUniformBufferBinding( - kFetchConstantsSize, - SingleTransientDescriptorLayout::kUniformBufferGuestShader, - buffer_info_fetch_constants, write_fetch_constants); - if (!mapping_fetch_constants) { - return false; - } - std::memcpy(mapping_fetch_constants, - ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, - kFetchConstantsSize); - write_descriptor_set_bits |= - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants; - current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetFetchConstants] = - write_fetch_constants.dstSet; + [SpirvShaderTranslator::kDescriptorSetConstants] = + constants_descriptor_set; } // Vertex shader samplers. if (write_vertex_samplers) { @@ -3926,7 +3933,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // Write. if (write_descriptor_set_count) { dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count, - write_descriptor_sets, 0, nullptr); + write_descriptor_sets.data(), 0, nullptr); } // Only make valid if all descriptor sets have been allocated and written // successfully. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 1186310f2..3581ae8d7 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -49,10 +49,6 @@ class VulkanCommandProcessor : public CommandProcessor { public: // Single-descriptor layouts for use within a single frame. enum class SingleTransientDescriptorLayout { - kUniformBufferGuestVertex, - kUniformBufferFragment, - kUniformBufferGuestShader, - kUniformBufferSystemConstants, kUniformBufferCompute, kStorageBufferCompute, kCount, @@ -530,6 +526,7 @@ class VulkanCommandProcessor : public CommandProcessor { VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_constants_ = VK_NULL_HANDLE; std::array descriptor_set_layouts_single_transient_{}; @@ -551,6 +548,10 @@ class VulkanCommandProcessor : public CommandProcessor { std::array, size_t(SingleTransientDescriptorLayout::kCount)> single_transient_descriptors_free_; + // . + std::deque> + constants_transient_descriptors_used_; + std::vector constants_transient_descriptors_free_; ui::vulkan::SingleTypeDescriptorSetAllocator transient_descriptor_allocator_sampled_image_; @@ -701,6 +702,11 @@ class VulkanCommandProcessor : public CommandProcessor { // Pipeline layout of the current guest graphics pipeline. const PipelineLayout* current_guest_graphics_pipeline_layout_; + VkDescriptorBufferInfo current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferCount]; + // Whether up-to-date data has been written to constant (uniform) buffers, and + // the buffer infos in current_constant_buffer_infos_ point to them. + uint32_t current_constant_buffers_up_to_date_; VkDescriptorSet current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetCount]; // Whether descriptor sets in current_graphics_descriptor_sets_ point to