From 89ac9294bf43367f352bfc8287cc58ad0927da26 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 7 Oct 2020 23:19:30 +0300 Subject: [PATCH] [Vulkan] Shared memory trace download --- .../gpu/vulkan/deferred_command_buffer.h | 12 ++- .../gpu/vulkan/vulkan_command_processor.cc | 13 ++- src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 101 ++++++++++++++++++ src/xenia/gpu/vulkan/vulkan_shared_memory.h | 9 ++ .../ui/vulkan/vulkan_immediate_drawer.cc | 87 ++------------- src/xenia/ui/vulkan/vulkan_util.cc | 90 ++++++++++++++++ src/xenia/ui/vulkan/vulkan_util.h | 16 +++ 7 files changed, 243 insertions(+), 85 deletions(-) diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h index 476abe605..879c92d5a 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.h +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -40,8 +40,8 @@ class DeferredCommandBuffer { args.index_type = index_type; } - void CmdVkCopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, - uint32_t region_count, const VkBufferCopy* regions) { + VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer, + uint32_t region_count) { static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); const size_t header_size = xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)); @@ -52,8 +52,12 @@ class DeferredCommandBuffer { args.src_buffer = src_buffer; args.dst_buffer = dst_buffer; args.region_count = region_count; - std::memcpy(args_ptr + header_size, regions, - sizeof(VkBufferCopy) * region_count); + return reinterpret_cast(args_ptr + header_size); + } + void CmdVkCopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, + uint32_t region_count, const VkBufferCopy* regions) { + std::memcpy(CmdCopyBufferEmplace(src_buffer, dst_buffer, region_count), + regions, sizeof(VkBufferCopy) * region_count); } // pNext of all barriers must be null. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 531182ca5..1dcea8284 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -198,7 +198,18 @@ bool VulkanCommandProcessor::IssueCopy() { return true; } -void VulkanCommandProcessor::InitializeTrace() {} +void VulkanCommandProcessor::InitializeTrace() { + BeginSubmission(false); + bool shared_memory_submitted = + shared_memory_->InitializeTraceSubmitDownloads(); + if (!shared_memory_submitted) { + return; + } + AwaitAllQueueOperationsCompletion(); + if (shared_memory_submitted) { + shared_memory_->InitializeTraceCompleteDownloads(); + } +} void VulkanCommandProcessor::FinalizeTrace() {} diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index 49b9cbbb0..e8b1790b3 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -182,6 +182,8 @@ bool VulkanSharedMemory::Initialize() { } void VulkanSharedMemory::Shutdown(bool from_destructor) { + ResetTraceDownload(); + upload_buffer_pool_.reset(); last_written_range_ = std::make_pair(0, 0); @@ -246,6 +248,92 @@ void VulkanSharedMemory::Use(Usage usage, last_written_range_ = written_range; } +bool VulkanSharedMemory::InitializeTraceSubmitDownloads() { + ResetTraceDownload(); + PrepareForTraceDownload(); + uint32_t download_page_count = trace_download_page_count(); + if (!download_page_count) { + return false; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, download_page_count << page_size_log2(), + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + ui::vulkan::util::MemoryPurpose::kReadback, trace_download_buffer_, + trace_download_buffer_memory_)) { + XELOGE( + "Shared memory: Failed to create a {} KB GPU-written memory download " + "buffer for frame tracing", + download_page_count << page_size_log2() >> 10); + ResetTraceDownload(); + return false; + } + + // TODO(Triang3l): End the render pass. + Use(Usage::kRead); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + size_t download_range_count = trace_download_ranges().size(); + VkBufferCopy* download_regions = command_buffer.CmdCopyBufferEmplace( + buffer_, trace_download_buffer_, uint32_t(download_range_count)); + VkDeviceSize download_buffer_offset = 0; + for (size_t i = 0; i < download_range_count; ++i) { + VkBufferCopy& download_region = download_regions[i]; + const std::pair& download_range = + trace_download_ranges()[i]; + download_region.srcOffset = download_range.first; + download_region.dstOffset = download_buffer_offset; + download_region.size = download_range.second; + download_buffer_offset += download_range.second; + } + + VkBufferMemoryBarrier download_buffer_barrier; + download_buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + download_buffer_barrier.pNext = nullptr; + download_buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + download_buffer_barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; + download_buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + download_buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + download_buffer_barrier.buffer = trace_download_buffer_; + download_buffer_barrier.offset = 0; + download_buffer_barrier.size = VK_WHOLE_SIZE; + command_buffer.CmdVkPipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, + 1, &download_buffer_barrier, 0, nullptr); + + return true; +} + +void VulkanSharedMemory::InitializeTraceCompleteDownloads() { + if (!trace_download_buffer_memory_) { + return; + } + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + void* download_mapping; + if (dfn.vkMapMemory(device, trace_download_buffer_memory_, 0, VK_WHOLE_SIZE, + 0, &download_mapping) == VK_SUCCESS) { + uint32_t download_buffer_offset = 0; + for (const auto& download_range : trace_download_ranges()) { + trace_writer_.WriteMemoryRead( + download_range.first, download_range.second, + reinterpret_cast(download_mapping) + + download_buffer_offset); + } + dfn.vkUnmapMemory(device, trace_download_buffer_memory_); + } else { + XELOGE( + "Shared memory: Failed to map the GPU-written memory download buffer " + "for frame tracing"); + } + ResetTraceDownload(); +} + bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange( uint32_t offset_allocations, uint32_t length_allocations) { if (!length_allocations) { @@ -296,6 +384,7 @@ bool VulkanSharedMemory::UploadRanges( if (upload_page_ranges.empty()) { return true; } + // TODO(Triang3l): End the render pass. // upload_page_ranges are sorted, use them to determine the range for the // ordering barrier. Use(Usage::kTransferDestination, @@ -401,6 +490,18 @@ void VulkanSharedMemory::GetBarrier(Usage usage, } } +void VulkanSharedMemory::ResetTraceDownload() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + trace_download_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + trace_download_buffer_memory_); + ReleaseTraceDownloadRanges(); +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.h b/src/xenia/gpu/vulkan/vulkan_shared_memory.h index 2d5d15a38..0d8e90813 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.h +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.h @@ -53,6 +53,10 @@ class VulkanSharedMemory : public SharedMemory { VkBuffer buffer() const { return buffer_; } + // Returns true if any downloads were submitted to the command processor. + bool InitializeTraceSubmitDownloads(); + void InitializeTraceCompleteDownloads(); + protected: bool AllocateSparseHostGpuMemoryRange(uint32_t offset_allocations, uint32_t length_allocations) override; @@ -78,6 +82,11 @@ class VulkanSharedMemory : public SharedMemory { std::unique_ptr upload_buffer_pool_; std::vector upload_regions_; + + // Created temporarily, only for downloading. + VkBuffer trace_download_buffer_ = VK_NULL_HANDLE; + VkDeviceMemory trace_download_buffer_memory_ = VK_NULL_HANDLE; + void ResetTraceDownload(); }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 703930de4..b30386793 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -982,17 +982,11 @@ bool VulkanImmediateDrawer::CreateTextureResource( VkDeviceMemory upload_buffer_memory = VK_NULL_HANDLE; if (data) { size_t data_size = sizeof(uint32_t) * width * height; - VkBufferCreateInfo upload_buffer_create_info; - upload_buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - upload_buffer_create_info.pNext = nullptr; - upload_buffer_create_info.flags = 0; - upload_buffer_create_info.size = VkDeviceSize(data_size); - upload_buffer_create_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - upload_buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - upload_buffer_create_info.queueFamilyIndexCount = 0; - upload_buffer_create_info.pQueueFamilyIndices = nullptr; - if (dfn.vkCreateBuffer(device, &upload_buffer_create_info, nullptr, - &upload_buffer) != VK_SUCCESS) { + uint32_t upload_buffer_memory_type; + if (!util::CreateDedicatedAllocationBuffer( + provider, VkDeviceSize(data_size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + util::MemoryPurpose::kUpload, upload_buffer, upload_buffer_memory, + &upload_buffer_memory_type)) { XELOGE( "Failed to create a Vulkan upload buffer for a {}x{} immediate " "drawer texture", @@ -1003,72 +997,6 @@ bool VulkanImmediateDrawer::CreateTextureResource( dfn.vkFreeMemory(device, image_memory, nullptr); return false; } - - VkMemoryAllocateInfo upload_buffer_memory_allocate_info; - VkMemoryRequirements upload_buffer_memory_requirements; - dfn.vkGetBufferMemoryRequirements(device, upload_buffer, - &upload_buffer_memory_requirements); - upload_buffer_memory_allocate_info.memoryTypeIndex = - util::ChooseHostMemoryType( - provider, upload_buffer_memory_requirements.memoryTypeBits, false); - if (upload_buffer_memory_allocate_info.memoryTypeIndex == UINT32_MAX) { - XELOGE( - "Failed to get a host-visible memory type for a Vulkan upload buffer " - "for a {}x{} immediate drawer texture", - width, height); - dfn.vkDestroyBuffer(device, upload_buffer, nullptr); - FreeTextureDescriptor(descriptor_index); - dfn.vkDestroyImageView(device, image_view, nullptr); - dfn.vkDestroyImage(device, image, nullptr); - dfn.vkFreeMemory(device, image_memory, nullptr); - return false; - } - upload_buffer_memory_allocate_info.sType = - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - VkMemoryDedicatedAllocateInfoKHR - upload_buffer_memory_dedicated_allocate_info; - if (dedicated_allocation_supported) { - upload_buffer_memory_dedicated_allocate_info.sType = - VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; - upload_buffer_memory_dedicated_allocate_info.pNext = nullptr; - upload_buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE; - upload_buffer_memory_dedicated_allocate_info.buffer = upload_buffer; - upload_buffer_memory_allocate_info.pNext = - &upload_buffer_memory_dedicated_allocate_info; - } else { - upload_buffer_memory_allocate_info.pNext = nullptr; - } - upload_buffer_memory_allocate_info.allocationSize = - util::GetMappableMemorySize(provider, - upload_buffer_memory_requirements.size); - if (dfn.vkAllocateMemory(device, &upload_buffer_memory_allocate_info, - nullptr, &upload_buffer_memory) != VK_SUCCESS) { - XELOGE( - "Failed to allocate memory for a Vulkan upload buffer for a {}x{} " - "immediate drawer texture", - width, height); - dfn.vkDestroyBuffer(device, upload_buffer, nullptr); - FreeTextureDescriptor(descriptor_index); - dfn.vkDestroyImageView(device, image_view, nullptr); - dfn.vkDestroyImage(device, image, nullptr); - dfn.vkFreeMemory(device, image_memory, nullptr); - return false; - } - if (dfn.vkBindBufferMemory(device, upload_buffer, upload_buffer_memory, - 0) != VK_SUCCESS) { - XELOGE( - "Failed to bind memory to a Vulkan upload buffer for a {}x{} " - "immediate drawer texture", - width, height); - dfn.vkDestroyBuffer(device, upload_buffer, nullptr); - dfn.vkFreeMemory(device, upload_buffer_memory, nullptr); - FreeTextureDescriptor(descriptor_index); - dfn.vkDestroyImageView(device, image_view, nullptr); - dfn.vkDestroyImage(device, image, nullptr); - dfn.vkFreeMemory(device, image_memory, nullptr); - return false; - } - void* upload_buffer_mapping; if (dfn.vkMapMemory(device, upload_buffer_memory, 0, VK_WHOLE_SIZE, 0, &upload_buffer_mapping) != VK_SUCCESS) { @@ -1085,9 +1013,8 @@ bool VulkanImmediateDrawer::CreateTextureResource( return false; } std::memcpy(upload_buffer_mapping, data, data_size); - util::FlushMappedMemoryRange( - provider, upload_buffer_memory, - upload_buffer_memory_allocate_info.memoryTypeIndex); + util::FlushMappedMemoryRange(provider, upload_buffer_memory, + upload_buffer_memory_type); dfn.vkUnmapMemory(device, upload_buffer_memory); } diff --git a/src/xenia/ui/vulkan/vulkan_util.cc b/src/xenia/ui/vulkan/vulkan_util.cc index d146beb83..49d8a949c 100644 --- a/src/xenia/ui/vulkan/vulkan_util.cc +++ b/src/xenia/ui/vulkan/vulkan_util.cc @@ -9,6 +9,9 @@ #include "xenia/ui/vulkan/vulkan_util.h" +#include + +#include "xenia/base/assert.h" #include "xenia/base/math.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -43,6 +46,93 @@ void FlushMappedMemoryRange(const VulkanProvider& provider, provider.dfn().vkFlushMappedMemoryRanges(provider.device(), 1, &range); } +bool CreateDedicatedAllocationBuffer( + const VulkanProvider& provider, VkDeviceSize size, VkBufferUsageFlags usage, + MemoryPurpose memory_purpose, VkBuffer& buffer_out, + VkDeviceMemory& memory_out, uint32_t* memory_type_out) { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkBufferCreateInfo buffer_create_info; + buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_create_info.pNext = nullptr; + buffer_create_info.flags = 0; + buffer_create_info.size = size; + buffer_create_info.usage = usage; + buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_create_info.queueFamilyIndexCount = 0; + buffer_create_info.pQueueFamilyIndices = nullptr; + VkBuffer buffer; + if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer) != + VK_SUCCESS) { + return false; + } + + VkMemoryRequirements memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); + uint32_t memory_type = UINT32_MAX; + switch (memory_purpose) { + case MemoryPurpose::kDeviceLocal: + if (!xe::bit_scan_forward(memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &memory_type)) { + memory_type = UINT32_MAX; + } + break; + case MemoryPurpose::kUpload: + case MemoryPurpose::kReadback: + memory_type = + ChooseHostMemoryType(provider, memory_requirements.memoryTypeBits, + memory_purpose == MemoryPurpose::kReadback); + break; + default: + assert_unhandled_case(memory_purpose); + } + if (memory_type == UINT32_MAX) { + dfn.vkDestroyBuffer(device, buffer, nullptr); + return false; + } + + VkMemoryAllocateInfo memory_allocate_info; + memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + memory_dedicated_allocate_info.pNext = nullptr; + memory_dedicated_allocate_info.image = VK_NULL_HANDLE; + memory_dedicated_allocate_info.buffer = buffer; + memory_allocate_info.pNext = &memory_dedicated_allocate_info; + } else { + memory_allocate_info.pNext = nullptr; + } + memory_allocate_info.allocationSize = memory_requirements.size; + if (memory_purpose == MemoryPurpose::kUpload || + memory_purpose == MemoryPurpose::kReadback) { + memory_allocate_info.allocationSize = + GetMappableMemorySize(provider, memory_allocate_info.allocationSize); + } + memory_allocate_info.memoryTypeIndex = memory_type; + VkDeviceMemory memory; + if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) != + VK_SUCCESS) { + dfn.vkDestroyBuffer(device, buffer, nullptr); + return false; + } + if (dfn.vkBindBufferMemory(device, buffer, memory, 0) != VK_SUCCESS) { + dfn.vkDestroyBuffer(device, buffer, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return false; + } + + buffer_out = buffer; + memory_out = memory; + if (memory_type_out) { + *memory_type_out = memory_type; + } + return true; +} + } // namespace util } // namespace vulkan } // namespace ui diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index 61bfec617..163731699 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -10,6 +10,8 @@ #ifndef XENIA_UI_VULKAN_VULKAN_UTIL_H_ #define XENIA_UI_VULKAN_VULKAN_UTIL_H_ +#include + #include "xenia/base/math.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -38,6 +40,12 @@ inline bool DestroyAndNullHandle(F* destroy_function, P parent, T& handle) { return false; } +enum class MemoryPurpose { + kDeviceLocal, + kUpload, + kReadback, +}; + inline VkDeviceSize GetMappableMemorySize(const VulkanProvider& provider, VkDeviceSize size) { VkDeviceSize non_coherent_atom_size = @@ -85,6 +93,14 @@ inline void InitializeSubresourceRange( range.layerCount = layer_count; } +// Creates a buffer backed by a dedicated allocation. If using a mappable memory +// purpose (upload/readback), the allocation size will be aligned to +// nonCoherentAtomSize. +bool CreateDedicatedAllocationBuffer( + const VulkanProvider& provider, VkDeviceSize size, VkBufferUsageFlags usage, + MemoryPurpose memory_purpose, VkBuffer& buffer_out, + VkDeviceMemory& memory_out, uint32_t* memory_type_out = nullptr); + inline VkShaderModule CreateShaderModule(const VulkanProvider& provider, const void* code, size_t code_size) { VkShaderModuleCreateInfo shader_module_create_info;