From 81c817c54d080b7ad234244b90bbcd328c7f2022 Mon Sep 17 00:00:00 2001 From: Robin Kertels Date: Fri, 23 Sep 2022 22:45:43 +0200 Subject: [PATCH] VideoBackends:Vulkan: Workaround slow vkCmdCopyImageToBuffer on QCom driver --- .../Core/VideoBackends/Vulkan/VKTexture.cpp | 161 +++++++++++++++++- Source/Core/VideoBackends/Vulkan/VKTexture.h | 18 +- Source/Core/VideoCommon/DriverDetails.cpp | 2 + Source/Core/VideoCommon/DriverDetails.h | 7 + 4 files changed, 179 insertions(+), 9 deletions(-) diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp index 2bd06bf332..ff2f2cc2d1 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp @@ -21,6 +21,7 @@ #include "VideoBackends/Vulkan/VKStreamBuffer.h" #include "VideoBackends/Vulkan/VulkanContext.h" +#include "VideoCommon/DriverDetails.h" #include "VideoCommon/VideoConfig.h" namespace Vulkan @@ -698,13 +699,22 @@ void VKTexture::TransitionToLayout(VkCommandBuffer command_buffer, &barrier); } -VKStagingTexture::VKStagingTexture(StagingTextureType type, const TextureConfig& config, - std::unique_ptr buffer) - : AbstractStagingTexture(type, config), m_staging_buffer(std::move(buffer)) +VKStagingTexture::VKStagingTexture(PrivateTag, StagingTextureType type, const TextureConfig& config, + std::unique_ptr buffer, VkImage linear_image, + VkDeviceMemory linear_image_memory) + : AbstractStagingTexture(type, config), m_staging_buffer(std::move(buffer)), + m_linear_image(linear_image), m_linear_image_memory(linear_image_memory) { } -VKStagingTexture::~VKStagingTexture() = default; +VKStagingTexture::~VKStagingTexture() +{ + if (m_linear_image != VK_NULL_HANDLE) + { + g_command_buffer_mgr->DeferImageDestruction(m_linear_image); + g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_linear_image_memory); + } +} std::unique_ptr VKStagingTexture::Create(StagingTextureType type, const TextureConfig& config) @@ -739,10 +749,20 @@ std::unique_ptr VKStagingTexture::Create(StagingTextureType ty return nullptr; } + // Linear image + VkImage linear_image = VK_NULL_HANDLE; + VkDeviceMemory linear_image_device_memory = VK_NULL_HANDLE; + if (DriverDetails::HasBug(DriverDetails::BUG_SLOW_OPTIMAL_IMAGE_TO_BUFFER_COPY) && + type == StagingTextureType::Readback && config.samples == 1) + { + std::tie(linear_image, linear_image_device_memory) = CreateLinearImage(type, config); + } + std::unique_ptr staging_buffer = std::make_unique(buffer_type, buffer, memory, buffer_size, coherent); - std::unique_ptr staging_tex = std::unique_ptr( - new VKStagingTexture(type, config, std::move(staging_buffer))); + std::unique_ptr staging_tex = + std::make_unique(PrivateTag{}, type, config, std::move(staging_buffer), + linear_image, linear_image_device_memory); // Use persistent mapping. if (!staging_tex->m_staging_buffer->Map()) @@ -752,6 +772,75 @@ std::unique_ptr VKStagingTexture::Create(StagingTextureType ty return staging_tex; } +std::pair VKStagingTexture::CreateLinearImage(StagingTextureType type, + const TextureConfig& config) +{ + // Create a intermediate texture with linear tiling + VkImageCreateInfo image_info = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + nullptr, + 0, + VK_IMAGE_TYPE_2D, + VKTexture::GetVkFormatForHostTextureFormat(config.format), + {config.width, config.height, 1}, + 1, + 1, + VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_TILING_LINEAR, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, + VK_SHARING_MODE_EXCLUSIVE, + 0, + nullptr, + VK_IMAGE_LAYOUT_UNDEFINED}; + + VkImageFormatProperties format_properties; + VkResult res = vkGetPhysicalDeviceImageFormatProperties( + g_vulkan_context->GetPhysicalDevice(), image_info.format, image_info.imageType, + image_info.tiling, image_info.usage, image_info.flags, &format_properties); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "Linear images are not supported for the staging texture: "); + return std::make_pair(VK_NULL_HANDLE, VK_NULL_HANDLE); + } + + VkImage image; + res = vkCreateImage(g_vulkan_context->GetDevice(), &image_info, nullptr, &image); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImage failed: "); + return std::make_pair(VK_NULL_HANDLE, VK_NULL_HANDLE); + } + + // Allocate memory to back this texture, we want device local memory in this case + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(g_vulkan_context->GetDevice(), image, &memory_requirements); + + VkMemoryAllocateInfo memory_info = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size, + g_vulkan_context + ->GetMemoryType(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + false) + .value_or(0)}; + + VkDeviceMemory device_memory; + res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + return std::make_pair(VK_NULL_HANDLE, VK_NULL_HANDLE); + } + + res = vkBindImageMemory(g_vulkan_context->GetDevice(), image, device_memory, 0); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkBindImageMemory failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), device_memory, nullptr); + return std::make_pair(VK_NULL_HANDLE, VK_NULL_HANDLE); + } + return std::make_pair(image, device_memory); +} + void VKStagingTexture::CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect) @@ -783,7 +872,16 @@ void VKStagingTexture::CopyFromTexture(const AbstractTexture* src, image_copy.imageOffset = {src_rect.left, src_rect.top, 0}; image_copy.imageExtent = {static_cast(src_rect.GetWidth()), static_cast(src_rect.GetHeight()), 1u}; - vkCmdCopyImageToBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_tex->GetImage(), + + VkImage src_image = src_tex->GetImage(); + if (m_linear_image != VK_NULL_HANDLE) + { + CopyFromTextureToLinearImage(src_tex, src_rect, src_layer, src_level, dst_rect); + src_image = m_linear_image; + image_copy.imageOffset = {0, 0, 0}; + } + + vkCmdCopyImageToBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_staging_buffer->GetBuffer(), 1, &image_copy); @@ -794,6 +892,55 @@ void VKStagingTexture::CopyFromTexture(const AbstractTexture* src, m_flush_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter(); } +void VKStagingTexture::CopyFromTextureToLinearImage(const VKTexture* src_tex, + const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) +{ + // The proprietary Qualcomm driver allocates a temporary image when copying from an image + // with optimal tiling (VK_IMAGE_TILING_OPTIMAL) to a buffer. + // That allocation is very slow, so we just do it ourself and reuse the intermediate image. + + const VkImageAspectFlags aspect = VKTexture::GetImageViewAspectForFormat(src_tex->GetFormat()); + + VkImageMemoryBarrier linear_image_barrier = {}; + linear_image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + linear_image_barrier.pNext = nullptr; + linear_image_barrier.srcAccessMask = 0; + linear_image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + linear_image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + linear_image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + linear_image_barrier.image = m_linear_image; + linear_image_barrier.subresourceRange = {aspect, 0, 1, 0, 1}; + vkCmdPipelineBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, + nullptr, 0, nullptr, 1, &linear_image_barrier); + + VkImageBlit blit; + blit.srcSubresource = {aspect, src_level, src_layer, 1}; + blit.dstSubresource.layerCount = 1; + blit.dstSubresource.baseArrayLayer = 0; + blit.dstSubresource.mipLevel = 0; + blit.dstSubresource.aspectMask = linear_image_barrier.subresourceRange.aspectMask; + blit.srcOffsets[0] = {src_rect.left, src_rect.top, 0}; + blit.srcOffsets[1] = {static_cast(blit.srcOffsets[0].x + src_rect.GetWidth()), + static_cast(blit.srcOffsets[0].y + src_rect.GetHeight()), 1}; + blit.dstOffsets[0] = {0, 0, 0}; + blit.dstOffsets[1] = {dst_rect.GetWidth(), dst_rect.GetHeight(), 1u}; + + vkCmdBlitImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_tex->GetImage(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_linear_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_NEAREST); + + linear_image_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + linear_image_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + linear_image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + + vkCmdPipelineBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, + nullptr, 0, nullptr, 1, &linear_image_barrier); +} + void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.h b/Source/Core/VideoBackends/Vulkan/VKTexture.h index ce25fec74f..be74442ba2 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.h +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.h @@ -84,8 +84,16 @@ private: class VKStagingTexture final : public AbstractStagingTexture { + struct PrivateTag + { + }; + public: VKStagingTexture() = delete; + VKStagingTexture(PrivateTag, StagingTextureType type, const TextureConfig& config, + std::unique_ptr buffer, VkImage linear_image, + VkDeviceMemory linear_image_memory); + ~VKStagingTexture(); void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, @@ -102,11 +110,17 @@ public: static std::unique_ptr Create(StagingTextureType type, const TextureConfig& config); + static std::pair CreateLinearImage(StagingTextureType type, + const TextureConfig& config); + private: - VKStagingTexture(StagingTextureType type, const TextureConfig& config, - std::unique_ptr buffer); + void CopyFromTextureToLinearImage(const VKTexture* src_tex, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect); std::unique_ptr m_staging_buffer; + VkImage m_linear_image = VK_NULL_HANDLE; + VkDeviceMemory m_linear_image_memory = VK_NULL_HANDLE; u64 m_flush_fence_counter = 0; }; diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index f255460d74..09fef47a4f 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -152,6 +152,8 @@ constexpr BugInfo m_known_bugs[] = { BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, -1.0, -1.0, true}, {API_METAL, OS_OSX, VENDOR_INTEL, DRIVER_APPLE, Family::UNKNOWN, BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, -1.0, -1.0, true}, + {API_VULKAN, OS_ANDROID, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, + BUG_SLOW_OPTIMAL_IMAGE_TO_BUFFER_COPY, -1.0, -1.0, true}, }; static std::map m_bugs; diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index b7a7a5689f..02827c35d5 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -329,6 +329,13 @@ enum Bug // Started version: -1 // Ended version: -1 BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, + + // BUG: vkCmdCopyImageToBuffer allocates a staging image when used to copy from + // an image with optimal tiling. + // Affected devices: Adreno + // Started Version: -1 + // Ended Version: -1 + BUG_SLOW_OPTIMAL_IMAGE_TO_BUFFER_COPY }; // Initializes our internal vendor, device family, and driver version