From 05ef7a273afcb45e31152ec6c77720198c0b1693 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 28 Jun 2022 22:42:18 +0300 Subject: [PATCH] [Vulkan] Samplers (only 1.0 core features for now) --- .../gpu/vulkan/vulkan_command_processor.cc | 181 +++++++--- .../gpu/vulkan/vulkan_command_processor.h | 7 + src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 35 +- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 2 + src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 315 ++++++++++++++++++ src/xenia/gpu/vulkan/vulkan_texture_cache.h | 72 ++++ 6 files changed, 550 insertions(+), 62 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 29eebba8e..0a89bb122 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2174,26 +2174,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } // TODO(Triang3l): Memory export. - if (!BeginSubmission(true)) { - return false; - } - - // Process primitives. - PrimitiveProcessor::ProcessingResult primitive_processing_result; - if (!primitive_processor_->Process(primitive_processing_result)) { - return false; - } - if (!primitive_processing_result.host_draw_vertex_count) { - // Nothing to draw. - return true; - } - // TODO(Triang3l): Tessellation, geometry-type-specific vertex shader, vertex - // shader as compute. - if (primitive_processing_result.host_vertex_shader_type != - Shader::HostVertexShaderType::kVertex) { - return false; - } - reg::RB_DEPTHCONTROL normalized_depth_control = draw_util::GetNormalizedDepthControl(regs); uint32_t normalized_color_mask = @@ -2201,14 +2181,132 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, regs, pixel_shader->writes_color_targets()) : 0; - // Shader modifications. - SpirvShaderTranslator::Modification vertex_shader_modification = - pipeline_cache_->GetCurrentVertexShaderModification( - *vertex_shader, primitive_processing_result.host_vertex_shader_type); - SpirvShaderTranslator::Modification pixel_shader_modification = - pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification( - *pixel_shader, normalized_color_mask) - : SpirvShaderTranslator::Modification(0); + PrimitiveProcessor::ProcessingResult primitive_processing_result; + SpirvShaderTranslator::Modification vertex_shader_modification; + SpirvShaderTranslator::Modification pixel_shader_modification; + VulkanShader::VulkanTranslation* vertex_shader_translation; + VulkanShader::VulkanTranslation* pixel_shader_translation; + + // Two iterations because a submission (even the current one - in which case + // it needs to be ended, and a new one must be started) may need to be awaited + // in case of a sampler count overflow, and if that happens, all subsystem + // updates done previously must be performed again because the updates done + // before the awaiting may be referencing objects destroyed by + // CompletedSubmissionUpdated. + for (uint32_t i = 0; i < 2; ++i) { + if (!BeginSubmission(true)) { + return false; + } + + // Process primitives. + if (!primitive_processor_->Process(primitive_processing_result)) { + return false; + } + if (!primitive_processing_result.host_draw_vertex_count) { + // Nothing to draw. + return true; + } + // TODO(Triang3l): Tessellation, geometry-type-specific vertex shader, + // vertex shader as compute. + if (primitive_processing_result.host_vertex_shader_type != + Shader::HostVertexShaderType::kVertex) { + return false; + } + + // Shader modifications. + vertex_shader_modification = + pipeline_cache_->GetCurrentVertexShaderModification( + *vertex_shader, + primitive_processing_result.host_vertex_shader_type); + pixel_shader_modification = + pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification( + *pixel_shader, normalized_color_mask) + : SpirvShaderTranslator::Modification(0); + + // Translate the shaders now to obtain the sampler bindings. + vertex_shader_translation = static_cast( + vertex_shader->GetOrCreateTranslation( + vertex_shader_modification.value)); + pixel_shader_translation = + pixel_shader ? static_cast( + pixel_shader->GetOrCreateTranslation( + pixel_shader_modification.value)) + : nullptr; + if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader_translation, + pixel_shader_translation)) { + return false; + } + + // Obtain the samplers. Note that the bindings don't depend on the shader + // modification, so if on the second iteration of this loop it becomes + // different for some reason (like a race condition with the guest in index + // buffer processing in the primitive processor resulting in different host + // vertex shader types), the bindings will stay the same. + // TODO(Triang3l): Sampler caching and reuse for adjacent draws within one + // submission. + uint32_t samplers_overflowed_count = 0; + for (uint32_t j = 0; j < 2; ++j) { + std::vector>& + shader_samplers = + j ? current_samplers_pixel_ : current_samplers_vertex_; + if (!i) { + shader_samplers.clear(); + } + const VulkanShader* shader = j ? pixel_shader : vertex_shader; + if (!shader) { + continue; + } + const std::vector& shader_sampler_bindings = + shader->GetSamplerBindingsAfterTranslation(); + if (!i) { + shader_samplers.reserve(shader_sampler_bindings.size()); + for (const VulkanShader::SamplerBinding& shader_sampler_binding : + shader_sampler_bindings) { + shader_samplers.emplace_back( + texture_cache_->GetSamplerParameters(shader_sampler_binding), + VK_NULL_HANDLE); + } + } + for (std::pair& + shader_sampler_pair : shader_samplers) { + // UseSampler calls are needed even on the second iteration in case the + // submission was broken (and thus the last usage submission indices for + // the used samplers need to be updated) due to an overflow within one + // submission. Though sampler overflow is a very rare situation overall. + bool sampler_overflowed; + VkSampler shader_sampler = texture_cache_->UseSampler( + shader_sampler_pair.first, sampler_overflowed); + shader_sampler_pair.second = shader_sampler; + if (shader_sampler == VK_NULL_HANDLE) { + if (!sampler_overflowed || i) { + // If !sampler_overflowed, just failed to create a sampler for some + // reason. + // If i == 1, an overflow has happened twice, can't recover from it + // anymore (would enter an infinite loop otherwise if the number of + // attempts was not limited to 2). Possibly too many unique samplers + // in one draw, or failed to await submission completion. + return false; + } + ++samplers_overflowed_count; + } + } + } + if (!samplers_overflowed_count) { + break; + } + assert_zero(i); + // Free space for as many samplers as how many haven't been allocated + // successfully - obtain the submission index that needs to be awaited to + // reuse `samplers_overflowed_count` slots. This must be done after all the + // UseSampler calls, not inside the loop calling UseSampler, because earlier + // UseSampler calls may "mark for deletion" some samplers that later + // UseSampler calls in the loop may actually demand. + uint64_t sampler_overflow_await_submission = + texture_cache_->GetSubmissionToAwaitOnSamplerOverflow( + samplers_overflowed_count); + assert_true(sampler_overflow_await_submission <= GetCurrentSubmission()); + CheckSubmissionFenceAndDeviceLoss(sampler_overflow_await_submission); + } // Set up the render targets - this may perform dispatches and draws. if (!render_target_cache_->Update(is_rasterization_done, @@ -2220,15 +2318,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // Create the pipeline (for this, need the render pass from the render target // cache), translating the shaders - doing this now to obtain the used // textures. - VulkanShader::VulkanTranslation* vertex_shader_translation = - static_cast( - vertex_shader->GetOrCreateTranslation( - vertex_shader_modification.value)); - VulkanShader::VulkanTranslation* pixel_shader_translation = - pixel_shader ? static_cast( - pixel_shader->GetOrCreateTranslation( - pixel_shader_modification.value)) - : nullptr; VkPipeline pipeline; const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; if (!pipeline_cache_->ConfigurePipeline( @@ -3532,18 +3621,15 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, (write_pixel_textures ? texture_count_pixel : 0)); size_t vertex_sampler_image_info_offset = descriptor_write_image_info_.size(); if (write_vertex_samplers) { - // TODO(Triang3l): Real samplers. - for (const VulkanShader::SamplerBinding& sampler_binding : - samplers_vertex) { + for (const std::pair& + sampler_pair : current_samplers_vertex_) { VkDescriptorImageInfo& descriptor_image_info = descriptor_write_image_info_.emplace_back(); - descriptor_image_info.sampler = provider.GetHostSampler( - ui::vulkan::VulkanProvider::HostSampler::kNearestClamp); + descriptor_image_info.sampler = sampler_pair.second; } } size_t vertex_texture_image_info_offset = descriptor_write_image_info_.size(); if (write_vertex_textures) { - // TODO(Triang3l): Real textures. for (const VulkanShader::TextureBinding& texture_binding : textures_vertex) { VkDescriptorImageInfo& descriptor_image_info = @@ -3558,18 +3644,15 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, } size_t pixel_sampler_image_info_offset = descriptor_write_image_info_.size(); if (write_pixel_samplers) { - // TODO(Triang3l): Real samplers. - for (const VulkanShader::SamplerBinding& sampler_binding : - *samplers_pixel) { + for (const std::pair& + sampler_pair : current_samplers_pixel_) { VkDescriptorImageInfo& descriptor_image_info = descriptor_write_image_info_.emplace_back(); - descriptor_image_info.sampler = provider.GetHostSampler( - ui::vulkan::VulkanProvider::HostSampler::kNearestClamp); + descriptor_image_info.sampler = sampler_pair.second; } } size_t pixel_texture_image_info_offset = descriptor_write_image_info_.size(); if (write_pixel_textures) { - // TODO(Triang3l): Real textures. for (const VulkanShader::TextureBinding& texture_binding : *textures_pixel) { VkDescriptorImageInfo& descriptor_image_info = diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index dece6e02a..cef27be57 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -158,6 +158,7 @@ class VulkanCommandProcessor : public CommandProcessor { return deferred_command_buffer_; } + bool submission_open() const { return submission_open_; } uint64_t GetCurrentSubmission() const { return submission_completed_ + uint64_t(submissions_in_flight_fences_.size()) + 1; @@ -676,6 +677,12 @@ class VulkanCommandProcessor : public CommandProcessor { bool dynamic_stencil_reference_front_update_needed_; bool dynamic_stencil_reference_back_update_needed_; + // Currently used samplers. + std::vector> + current_samplers_vertex_; + std::vector> + current_samplers_pixel_; + // Cache render pass currently started in the command buffer with the // framebuffer. VkRenderPass current_render_pass_; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index d3049a561..406a1a444 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -158,20 +158,9 @@ VulkanPipelineCache::GetCurrentPixelShaderModification( return modification; } -bool VulkanPipelineCache::ConfigurePipeline( +bool VulkanPipelineCache::EnsureShadersTranslated( VulkanShader::VulkanTranslation* vertex_shader, - VulkanShader::VulkanTranslation* pixel_shader, - const PrimitiveProcessor::ProcessingResult& primitive_processing_result, - reg::RB_DEPTHCONTROL normalized_depth_control, - uint32_t normalized_color_mask, - VulkanRenderTargetCache::RenderPassKey render_pass_key, - VkPipeline& pipeline_out, - const PipelineLayoutProvider*& pipeline_layout_out) { -#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES - - // Ensure shaders are translated - needed now for GetCurrentStateDescription. + VulkanShader::VulkanTranslation* pixel_shader) { // Edge flags are not supported yet (because polygon primitives are not). assert_true(register_file_.Get().vs_export_mode != xenos::VertexShaderExportMode::kPosition2VectorsEdge && @@ -202,6 +191,26 @@ bool VulkanPipelineCache::ConfigurePipeline( return false; } } + return true; +} + +bool VulkanPipelineCache::ConfigurePipeline( + VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + reg::RB_DEPTHCONTROL normalized_depth_control, + uint32_t normalized_color_mask, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + VkPipeline& pipeline_out, + const PipelineLayoutProvider*& pipeline_layout_out) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + // Ensure shaders are translated - needed now for GetCurrentStateDescription. + if (!EnsureShadersTranslated(vertex_shader, pixel_shader)) { + return false; + } PipelineDescription description; if (!GetCurrentStateDescription( diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index 819bd6e16..141d756c8 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -74,6 +74,8 @@ class VulkanPipelineCache { SpirvShaderTranslator::Modification GetCurrentPixelShaderModification( const Shader& shader, uint32_t normalized_color_mask) const; + bool EnsureShadersTranslated(VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader); // TODO(Triang3l): Return a deferred creation handle. bool ConfigurePipeline( VulkanShader::VulkanTranslation* vertex_shader, diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index ac7543330..51c1d1b09 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -18,6 +18,7 @@ #include "xenia/base/math.h" #include "xenia/base/profiling.h" #include "xenia/gpu/texture_info.h" +#include "xenia/gpu/texture_util.h" #include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/ui/vulkan/vulkan_util.h" @@ -425,6 +426,15 @@ VulkanTextureCache::~VulkanTextureCache() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + for (const std::pair& sampler_pair : + samplers_) { + dfn.vkDestroySampler(device, sampler_pair.second.sampler, nullptr); + } + samplers_.clear(); + COUNT_profile_set("gpu/texture_cache/vulkan/samplers", 0); + sampler_used_last_ = nullptr; + sampler_used_first_ = nullptr; + if (null_image_view_3d_ != VK_NULL_HANDLE) { dfn.vkDestroyImageView(device, null_image_view_3d_, nullptr); } @@ -589,6 +599,266 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView( } } +VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters( + const VulkanShader::SamplerBinding& binding) const { + const auto& regs = register_file(); + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); + + SamplerParameters parameters; + + xenos::ClampMode fetch_clamp_x, fetch_clamp_y, fetch_clamp_z; + texture_util::GetClampModesForDimension(fetch, fetch_clamp_x, fetch_clamp_y, + fetch_clamp_z); + parameters.clamp_x = NormalizeClampMode(fetch_clamp_x); + parameters.clamp_y = NormalizeClampMode(fetch_clamp_y); + parameters.clamp_z = NormalizeClampMode(fetch_clamp_z); + if (xenos::ClampModeUsesBorder(parameters.clamp_x) || + xenos::ClampModeUsesBorder(parameters.clamp_y) || + xenos::ClampModeUsesBorder(parameters.clamp_z)) { + parameters.border_color = fetch.border_color; + } else { + parameters.border_color = xenos::BorderColor::k_ABGR_Black; + } + + xenos::TextureFilter mag_filter = + binding.mag_filter == xenos::TextureFilter::kUseFetchConst + ? fetch.mag_filter + : binding.mag_filter; + parameters.mag_linear = mag_filter == xenos::TextureFilter::kLinear; + xenos::TextureFilter min_filter = + binding.min_filter == xenos::TextureFilter::kUseFetchConst + ? fetch.min_filter + : binding.min_filter; + parameters.min_linear = min_filter == xenos::TextureFilter::kLinear; + xenos::TextureFilter mip_filter = + binding.mip_filter == xenos::TextureFilter::kUseFetchConst + ? fetch.mip_filter + : binding.mip_filter; + parameters.mip_linear = mip_filter == xenos::TextureFilter::kLinear; + if (parameters.mag_linear || parameters.min_linear || parameters.mip_linear) { + // Check if the texture is actually filterable on the host. + bool linear_filterable = true; + TextureKey texture_key; + uint8_t texture_swizzled_signs; + BindingInfoFromFetchConstant(fetch, texture_key, &texture_swizzled_signs); + if (texture_key.is_valid) { + const HostFormatPair& host_format_pair = GetHostFormatPair(texture_key); + if ((texture_util::IsAnySignNotSigned(texture_swizzled_signs) && + !host_format_pair.format_unsigned.linear_filterable) || + (texture_util::IsAnySignSigned(texture_swizzled_signs) && + !host_format_pair.format_signed.linear_filterable)) { + linear_filterable = false; + } + } else { + linear_filterable = false; + } + if (!linear_filterable) { + parameters.mag_linear = 0; + parameters.min_linear = 0; + parameters.mip_linear = 0; + } + } + xenos::AnisoFilter aniso_filter = + binding.aniso_filter == xenos::AnisoFilter::kUseFetchConst + ? fetch.aniso_filter + : binding.aniso_filter; + parameters.aniso_filter = std::min(aniso_filter, max_anisotropy_); + parameters.mip_base_map = mip_filter == xenos::TextureFilter::kBaseMap; + + uint32_t mip_min_level; + texture_util::GetSubresourcesFromFetchConstant(fetch, nullptr, nullptr, + nullptr, nullptr, nullptr, + &mip_min_level, nullptr); + parameters.mip_min_level = mip_min_level; + + return parameters; +} + +VkSampler VulkanTextureCache::UseSampler(SamplerParameters parameters, + bool& has_overflown_out) { + assert_true(command_processor_.submission_open()); + uint64_t submission_current = command_processor_.GetCurrentSubmission(); + + // Try to find an existing sampler. + auto it_existing = samplers_.find(parameters); + if (it_existing != samplers_.end()) { + std::pair& sampler = *it_existing; + assert_true(sampler.second.last_usage_submission <= submission_current); + // This is called very frequently, don't relink unless needed for caching. + if (sampler.second.last_usage_submission < submission_current) { + // Move to the front of the LRU queue. + sampler.second.last_usage_submission = submission_current; + if (sampler.second.used_next) { + if (sampler.second.used_previous) { + sampler.second.used_previous->second.used_next = + sampler.second.used_next; + } else { + sampler_used_first_ = sampler.second.used_next; + } + sampler.second.used_next->second.used_previous = + sampler.second.used_previous; + sampler.second.used_previous = sampler_used_last_; + sampler.second.used_next = nullptr; + sampler_used_last_->second.used_next = &sampler; + sampler_used_last_ = &sampler; + } + } + has_overflown_out = false; + return sampler.second.sampler; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // See if an existing sampler can be destroyed to create space for the new + // one. + if (samplers_.size() >= sampler_max_count_) { + assert_not_null(sampler_used_first_); + if (!sampler_used_first_) { + has_overflown_out = false; + return VK_NULL_HANDLE; + } + if (sampler_used_first_->second.last_usage_submission > + command_processor_.GetCompletedSubmission()) { + has_overflown_out = true; + return VK_NULL_HANDLE; + } + auto it_reuse = samplers_.find(sampler_used_first_->first); + dfn.vkDestroySampler(device, sampler_used_first_->second.sampler, nullptr); + if (sampler_used_first_->second.used_next) { + sampler_used_first_->second.used_next->second.used_previous = + sampler_used_first_->second.used_previous; + } else { + sampler_used_last_ = sampler_used_first_->second.used_previous; + } + sampler_used_first_ = sampler_used_first_->second.used_next; + assert_true(it_reuse != samplers_.end()); + if (it_reuse != samplers_.end()) { + // This destroys the Sampler object. + samplers_.erase(it_reuse); + COUNT_profile_set("gpu/texture_cache/vulkan/samplers", samplers_.size()); + } else { + has_overflown_out = false; + return VK_NULL_HANDLE; + } + } + + // Create a new sampler and make it the least recently used. + // The values are normalized, and unsupported ones are excluded, in + // GetSamplerParameters. + VkSamplerCreateInfo sampler_create_info = {}; + sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + // TODO(Triang3l): VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT if + // VK_EXT_non_seamless_cube_map and the nonSeamlessCubeMap feature are + // supported. + sampler_create_info.magFilter = + parameters.mag_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + sampler_create_info.minFilter = + parameters.mag_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + sampler_create_info.mipmapMode = parameters.mag_linear + ? VK_SAMPLER_MIPMAP_MODE_LINEAR + : VK_SAMPLER_MIPMAP_MODE_NEAREST; + static const VkSamplerAddressMode kAddressModeMap[] = { + // kRepeat + VK_SAMPLER_ADDRESS_MODE_REPEAT, + // kMirroredRepeat + VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, + // kClampToEdge + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + // kMirrorClampToEdge + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + // kClampToHalfway + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + // kMirrorClampToHalfway + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + // kClampToBorder + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + // kMirrorClampToBorder + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + }; + sampler_create_info.addressModeU = + kAddressModeMap[uint32_t(parameters.clamp_x)]; + sampler_create_info.addressModeV = + kAddressModeMap[uint32_t(parameters.clamp_y)]; + sampler_create_info.addressModeW = + kAddressModeMap[uint32_t(parameters.clamp_z)]; + // LOD biasing is performed in shaders. + if (parameters.aniso_filter != xenos::AnisoFilter::kDisabled) { + sampler_create_info.anisotropyEnable = VK_TRUE; + sampler_create_info.maxAnisotropy = + float(UINT32_C(1) << (uint32_t(parameters.aniso_filter) - + uint32_t(xenos::AnisoFilter::kMax_1_1))); + } + sampler_create_info.minLod = float(parameters.mip_min_level); + if (parameters.mip_base_map) { + assert_false(parameters.mip_linear); + sampler_create_info.maxLod = sampler_create_info.minLod + 0.25f; + } else { + sampler_create_info.maxLod = VK_LOD_CLAMP_NONE; + } + // TODO(Triang3l): Custom border colors for CrYCb / YCrCb. + switch (parameters.border_color) { + case xenos::BorderColor::k_ABGR_White: + sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + break; + default: + sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + break; + } + VkSampler vulkan_sampler; + if (dfn.vkCreateSampler(device, &sampler_create_info, nullptr, + &vulkan_sampler) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to create the sampler for parameters " + "0x{:08X}", + parameters.value); + has_overflown_out = false; + return VK_NULL_HANDLE; + } + std::pair& new_sampler = + *(samplers_ + .emplace(std::piecewise_construct, + std::forward_as_tuple(parameters), std::forward_as_tuple()) + .first); + COUNT_profile_set("gpu/texture_cache/vulkan/samplers", samplers_.size()); + new_sampler.second.sampler = vulkan_sampler; + new_sampler.second.last_usage_submission = submission_current; + new_sampler.second.used_previous = sampler_used_last_; + new_sampler.second.used_next = nullptr; + if (sampler_used_last_) { + sampler_used_last_->second.used_next = &new_sampler; + } else { + sampler_used_first_ = &new_sampler; + } + sampler_used_last_ = &new_sampler; + return vulkan_sampler; +} + +uint64_t VulkanTextureCache::GetSubmissionToAwaitOnSamplerOverflow( + uint32_t overflowed_sampler_count) const { + if (!overflowed_sampler_count) { + return 0; + } + std::pair* sampler_used = + sampler_used_first_; + if (!sampler_used_first_) { + return 0; + } + for (uint32_t samplers_remaining = overflowed_sampler_count - 1; + samplers_remaining; --samplers_remaining) { + std::pair* sampler_used_next = + sampler_used->second.used_next; + if (!sampler_used_next) { + break; + } + sampler_used = sampler_used_next; + } + return sampler_used->second.last_usage_submission; +} + VkImageView VulkanTextureCache::RequestSwapTexture( uint32_t& width_scaled_out, uint32_t& height_scaled_out, xenos::TextureFormat& format_out) { @@ -2278,6 +2548,32 @@ bool VulkanTextureCache::Initialize() { null_images_cleared_ = false; + // Samplers. + + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + + // Some MoltenVK devices have a maximum of 2048, 1024, or even 96 samplers, + // below Vulkan's minimum requirement of 4000. + // Assuming that the current VulkanTextureCache is the only one on this + // VkDevice (true in a regular emulation scenario), so taking over all the + // allocation slots exclusively. + // Also leaving a few slots for use by things like overlay applications. + sampler_max_count_ = + device_limits.maxSamplerAllocationCount - + uint32_t(ui::vulkan::VulkanProvider::HostSampler::kCount) - 16; + + if (device_features.samplerAnisotropy) { + max_anisotropy_ = xenos::AnisoFilter( + uint32_t(xenos::AnisoFilter::kMax_1_1) + + (31 - + xe::lzcnt(uint32_t(std::min( + 16.0f, std::max(1.0f, device_limits.maxSamplerAnisotropy)))))); + } else { + max_anisotropy_ = xenos::AnisoFilter::kDisabled; + } + return true; } @@ -2325,6 +2621,25 @@ void VulkanTextureCache::GetTextureUsageMasks(VulkanTexture::Usage usage, } } +xenos::ClampMode VulkanTextureCache::NormalizeClampMode( + xenos::ClampMode clamp_mode) const { + if (clamp_mode == xenos::ClampMode::kClampToHalfway) { + // No GL_CLAMP (clamp to half edge, half border) equivalent in Vulkan, but + // there's no Direct3D 9 equivalent anyway, and too weird to be suitable for + // intentional real usage. + return xenos::ClampMode::kClampToEdge; + } + if (clamp_mode == xenos::ClampMode::kMirrorClampToEdge || + clamp_mode == xenos::ClampMode::kMirrorClampToHalfway || + clamp_mode == xenos::ClampMode::kMirrorClampToBorder) { + // TODO(Triang3l): VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR if + // VK_KHR_sampler_mirror_clamp_to_edge (or Vulkan 1.2) and the + // samplerMirrorClampToEdge feature are supported. + return xenos::ClampMode::kMirroredRepeat; + } + return clamp_mode; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index 423131c55..79e5db493 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -17,6 +17,7 @@ #include "xenia/base/hash.h" #include "xenia/gpu/texture_cache.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -28,6 +29,39 @@ class VulkanCommandProcessor; class VulkanTextureCache final : public TextureCache { public: + // Sampler parameters that can be directly converted to a host sampler or used + // for checking whether samplers bindings are up to date. + union SamplerParameters { + uint32_t value; + struct { + xenos::ClampMode clamp_x : 3; // 3 + xenos::ClampMode clamp_y : 3; // 6 + xenos::ClampMode clamp_z : 3; // 9 + xenos::BorderColor border_color : 2; // 11 + uint32_t mag_linear : 1; // 12 + uint32_t min_linear : 1; // 13 + uint32_t mip_linear : 1; // 14 + xenos::AnisoFilter aniso_filter : 3; // 17 + uint32_t mip_min_level : 4; // 21 + uint32_t mip_base_map : 1; // 22 + // Maximum mip level is in the texture resource itself, but mip_base_map + // can be used to limit fetching to mip_min_level. + }; + + SamplerParameters() : value(0) { static_assert_size(*this, sizeof(value)); } + struct Hasher { + size_t operator()(const SamplerParameters& parameters) const { + return std::hash{}(parameters.value); + } + }; + bool operator==(const SamplerParameters& parameters) const { + return value == parameters.value; + } + bool operator!=(const SamplerParameters& parameters) const { + return value != parameters.value; + } + }; + // Transient descriptor set layouts must be initialized in the command // processor. static std::unique_ptr Create( @@ -60,6 +94,26 @@ class VulkanTextureCache final : public TextureCache { xenos::FetchOpDimension dimension, bool is_signed) const; + SamplerParameters GetSamplerParameters( + const VulkanShader::SamplerBinding& binding) const; + + // Must be called for every used sampler at least once in a single submission, + // and a submission must be open for this to be callable. + // Returns: + // - The sampler, if obtained successfully - and increases its last usage + // submission index - and has_overflown_out = false. + // - VK_NULL_HANDLE and has_overflown_out = true if there's a total sampler + // count overflow in a submission that potentially hasn't completed yet. + // - VK_NULL_HANDLE and has_overflown_out = false in case of a general failure + // to create a sampler. + VkSampler UseSampler(SamplerParameters parameters, bool& has_overflown_out); + // Returns the submission index to await (may be the current submission in + // case of an overflow within a single submission - in this case, it must be + // ended, and a new one must be started) in case of sampler count overflow, so + // samplers may be freed, and UseSamplers may take their slots. + uint64_t GetSubmissionToAwaitOnSamplerOverflow( + uint32_t overflowed_sampler_count) const; + // Returns the 2D view of the front buffer texture (for fragment shader // reading - the barrier will be pushed in the command processor if needed), // or VK_NULL_HANDLE in case of failure. May call LoadTextureData. @@ -220,6 +274,13 @@ class VulkanTextureCache final : public TextureCache { } }; + struct Sampler { + VkSampler sampler; + uint64_t last_usage_submission; + std::pair* used_previous; + std::pair* used_next; + }; + static constexpr bool AreDimensionsCompatible( xenos::FetchOpDimension binding_dimension, xenos::DataDimension resource_dimension) { @@ -251,6 +312,8 @@ class VulkanTextureCache final : public TextureCache { VkPipelineStageFlags& stage_mask, VkAccessFlags& access_mask, VkImageLayout& layout); + xenos::ClampMode NormalizeClampMode(xenos::ClampMode clamp_mode) const; + VulkanCommandProcessor& command_processor_; VkPipelineStageFlags guest_shader_pipeline_stages_; @@ -275,6 +338,15 @@ class VulkanTextureCache final : public TextureCache { std::array vulkan_texture_bindings_; + + uint32_t sampler_max_count_; + + xenos::AnisoFilter max_anisotropy_; + + std::unordered_map + samplers_; + std::pair* sampler_used_first_ = nullptr; + std::pair* sampler_used_last_ = nullptr; }; } // namespace vulkan