From 4d59f556a95426fe01cc52b1513f9238d84d999a Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 7 Oct 2020 21:03:50 +0300 Subject: [PATCH] [Vulkan] Sparse shared memory --- src/xenia/gpu/d3d12/d3d12_shared_memory.cc | 8 +- .../gpu/vulkan/vulkan_command_processor.cc | 127 +++++++++++-- .../gpu/vulkan/vulkan_command_processor.h | 29 ++- src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 167 ++++++++++++++---- src/xenia/gpu/vulkan/vulkan_shared_memory.h | 17 +- src/xenia/ui/vulkan/vulkan_provider.cc | 3 +- src/xenia/ui/vulkan/vulkan_provider.h | 15 +- 7 files changed, 298 insertions(+), 68 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_shared_memory.cc b/src/xenia/gpu/d3d12/d3d12_shared_memory.cc index 992f9aed5..c260545ac 100644 --- a/src/xenia/gpu/d3d12/d3d12_shared_memory.cc +++ b/src/xenia/gpu/d3d12/d3d12_shared_memory.cc @@ -22,9 +22,9 @@ DEFINE_bool(d3d12_tiled_shared_memory, true, "Enable tiled resources for shared memory emulation. Disabling " - "them greatly increases video memory usage - a 512 MB buffer is " - "created - but allows graphics debuggers that don't support tiled " - "resources to work.", + "them greatly video memory usage - a 512 MB buffer is created - " + "but allows graphics debuggers that don't support tiled resources " + "to work.", "D3D12"); namespace xe { @@ -68,7 +68,7 @@ bool D3D12SharedMemory::Initialize() { XELOGGPU( "Direct3D 12 tiled resources are not used for shared memory " "emulation - video memory usage may increase significantly " - "because a full {} MB buffer will be created!", + "because a full {} MB buffer will be created", kBufferSize >> 20); if (provider.GetGraphicsAnalysis()) { // As of October 8th, 2018, PIX doesn't support tiled buffers. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index c688ca6ee..531182ca5 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -63,6 +63,10 @@ void VulkanCommandProcessor::ShutdownContext() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + sparse_bind_wait_stage_mask_ = 0; + sparse_buffer_binds_.clear(); + sparse_memory_binds_.clear(); + deferred_command_buffer_.Reset(); for (const auto& command_buffer_pair : command_buffers_submitted_) { dfn.vkDestroyCommandPool(device, command_buffer_pair.first.pool, nullptr); @@ -78,15 +82,19 @@ void VulkanCommandProcessor::ShutdownContext() { frame_current_ = 1; frame_open_ = false; - for (const auto& semaphore : - submissions_in_flight_sparse_binding_semaphores_) { + for (const auto& semaphore : submissions_in_flight_semaphores_) { dfn.vkDestroySemaphore(device, semaphore.first, nullptr); } - submissions_in_flight_sparse_binding_semaphores_.clear(); + submissions_in_flight_semaphores_.clear(); for (VkFence& fence : submissions_in_flight_fences_) { dfn.vkDestroyFence(device, fence, nullptr); } submissions_in_flight_fences_.clear(); + current_submission_wait_stage_masks_.clear(); + for (VkSemaphore semaphore : current_submission_wait_semaphores_) { + dfn.vkDestroySemaphore(device, semaphore, nullptr); + } + current_submission_wait_semaphores_.clear(); submission_completed_ = 0; submission_open_ = false; @@ -102,6 +110,22 @@ void VulkanCommandProcessor::ShutdownContext() { CommandProcessor::ShutdownContext(); } +void VulkanCommandProcessor::SparseBindBuffer( + VkBuffer buffer, uint32_t bind_count, const VkSparseMemoryBind* binds, + VkPipelineStageFlags wait_stage_mask) { + if (!bind_count) { + return; + } + SparseBufferBind& buffer_bind = sparse_buffer_binds_.emplace_back(); + buffer_bind.buffer = buffer; + buffer_bind.bind_offset = sparse_memory_binds_.size(); + buffer_bind.bind_count = bind_count; + sparse_memory_binds_.reserve(sparse_memory_binds_.size() + bind_count); + sparse_memory_binds_.insert(sparse_memory_binds_.end(), binds, + binds + bind_count); + sparse_bind_wait_stage_mask_ |= wait_stage_mask; +} + void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) { @@ -233,15 +257,15 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { submissions_in_flight_fences_awaited_end); submission_completed_ += fences_awaited; - // Reclaim semaphores used for sparse binding and graphics synchronization. - while (!submissions_in_flight_sparse_binding_semaphores_.empty()) { + // Reclaim semaphores. + while (!submissions_in_flight_semaphores_.empty()) { const auto& semaphore_submission = - submissions_in_flight_sparse_binding_semaphores_.front(); + submissions_in_flight_semaphores_.front(); if (semaphore_submission.second > submission_completed_) { break; } semaphores_free_.push_back(semaphore_submission.first); - submissions_in_flight_sparse_binding_semaphores_.pop_front(); + submissions_in_flight_semaphores_.pop_front(); } // Reclaim command pools. @@ -322,14 +346,26 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { VkFence fence; if (dfn.vkCreateFence(device, &fence_create_info, nullptr, &fence) != VK_SUCCESS) { - XELOGE("Failed to create a Vulkan submission fence"); + XELOGE("Failed to create a Vulkan fence"); // Try to submit later. Completely dropping the submission is not // permitted because resources would be left in an undefined state. return false; } fences_free_.push_back(fence); } - // TODO(Triang3l): Create a sparse binding semaphore. + if (!sparse_memory_binds_.empty() && semaphores_free_.empty()) { + VkSemaphoreCreateInfo semaphore_create_info; + semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_create_info.pNext = nullptr; + semaphore_create_info.flags = 0; + VkSemaphore semaphore; + if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr, + &semaphore) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan semaphore"); + return false; + } + semaphores_free_.push_back(semaphore); + } if (command_buffers_writable_.empty()) { CommandBuffer command_buffer; VkCommandPoolCreateInfo command_pool_create_info; @@ -366,6 +402,52 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { if (submission_open_) { shared_memory_->EndSubmission(); + // Submit sparse binds earlier, before executing the deferred command + // buffer, to reduce latency. + if (!sparse_memory_binds_.empty()) { + sparse_buffer_bind_infos_temp_.clear(); + sparse_buffer_bind_infos_temp_.reserve(sparse_buffer_binds_.size()); + for (const SparseBufferBind& sparse_buffer_bind : sparse_buffer_binds_) { + VkSparseBufferMemoryBindInfo& sparse_buffer_bind_info = + sparse_buffer_bind_infos_temp_.emplace_back(); + sparse_buffer_bind_info.buffer = sparse_buffer_bind.buffer; + sparse_buffer_bind_info.bindCount = sparse_buffer_bind.bind_count; + sparse_buffer_bind_info.pBinds = + sparse_memory_binds_.data() + sparse_buffer_bind.bind_offset; + } + assert_false(semaphores_free_.empty()); + VkSemaphore bind_sparse_semaphore = semaphores_free_.back(); + VkBindSparseInfo bind_sparse_info; + bind_sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + bind_sparse_info.pNext = nullptr; + bind_sparse_info.waitSemaphoreCount = 0; + bind_sparse_info.pWaitSemaphores = nullptr; + bind_sparse_info.bufferBindCount = + uint32_t(sparse_buffer_bind_infos_temp_.size()); + bind_sparse_info.pBufferBinds = + !sparse_buffer_bind_infos_temp_.empty() + ? sparse_buffer_bind_infos_temp_.data() + : nullptr; + bind_sparse_info.imageOpaqueBindCount = 0; + bind_sparse_info.pImageOpaqueBinds = nullptr; + bind_sparse_info.imageBindCount = 0; + bind_sparse_info.pImageBinds = 0; + bind_sparse_info.signalSemaphoreCount = 1; + bind_sparse_info.pSignalSemaphores = &bind_sparse_semaphore; + if (provider.BindSparse(1, &bind_sparse_info, VK_NULL_HANDLE) != + VK_SUCCESS) { + XELOGE("Failed to submit Vulkan sparse binds"); + return false; + } + current_submission_wait_semaphores_.push_back(bind_sparse_semaphore); + semaphores_free_.pop_back(); + current_submission_wait_stage_masks_.push_back( + sparse_bind_wait_stage_mask_); + sparse_bind_wait_stage_mask_ = 0; + sparse_buffer_binds_.clear(); + sparse_memory_binds_.clear(); + } + assert_false(command_buffers_writable_.empty()); CommandBuffer command_buffer = command_buffers_writable_.back(); if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) { @@ -385,18 +467,25 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { return false; } deferred_command_buffer_.Execute(command_buffer.buffer); - // TODO(Triang3l): Write deferred command buffer commands. if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) { XELOGE("Failed to end a Vulkan command buffer"); return false; } - // TODO(Triang3l): Submit sparse binding. + VkSubmitInfo submit_info; submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submit_info.pNext = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.pWaitDstStageMask = nullptr; + if (!current_submission_wait_semaphores_.empty()) { + submit_info.waitSemaphoreCount = + uint32_t(current_submission_wait_semaphores_.size()); + submit_info.pWaitSemaphores = current_submission_wait_semaphores_.data(); + submit_info.pWaitDstStageMask = + current_submission_wait_stage_masks_.data(); + } else { + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = nullptr; + submit_info.pWaitDstStageMask = nullptr; + } submit_info.commandBufferCount = 1; submit_info.pCommandBuffers = &command_buffer.buffer; submit_info.signalSemaphoreCount = 0; @@ -412,8 +501,14 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { XELOGE("Failed to submit a Vulkan command buffer"); return false; } - command_buffers_submitted_.push_back( - std::make_pair(command_buffer, GetCurrentSubmission())); + uint64_t submission_current = GetCurrentSubmission(); + current_submission_wait_stage_masks_.clear(); + for (VkSemaphore semaphore : current_submission_wait_semaphores_) { + submissions_in_flight_semaphores_.emplace_back(semaphore, + submission_current); + } + current_submission_wait_semaphores_.clear(); + command_buffers_submitted_.emplace_back(command_buffer, submission_current); command_buffers_writable_.pop_back(); // Increments the current submission number, going to the next submission. submissions_in_flight_fences_.push_back(fence); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 016f9f7d0..6b9096a20 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -54,6 +54,16 @@ class VulkanCommandProcessor : public CommandProcessor { } uint64_t GetCompletedSubmission() const { return submission_completed_; } + // Sparse binds are: + // - In a single submission, all submitted in one vkQueueBindSparse. + // - Sent to the queue without waiting for a semaphore. + // Thus, multiple sparse binds between the completed and the current + // submission, and within one submission, must not touch any overlapping + // memory regions. + void SparseBindBuffer(VkBuffer buffer, uint32_t bind_count, + const VkSparseMemoryBind* binds, + VkPipelineStageFlags wait_stage_mask); + protected: bool SetupContext() override; void ShutdownContext() override; @@ -103,9 +113,13 @@ class VulkanCommandProcessor : public CommandProcessor { bool submission_open_ = false; uint64_t submission_completed_ = 0; + // In case vkQueueSubmit fails after something like a successful + // vkQueueBindSparse, to wait correctly on the next attempt. + std::vector current_submission_wait_semaphores_; + std::vector current_submission_wait_stage_masks_; std::vector submissions_in_flight_fences_; std::deque> - submissions_in_flight_sparse_binding_semaphores_; + submissions_in_flight_semaphores_; static constexpr uint32_t kMaxFramesInFlight = 3; bool frame_open_ = false; @@ -124,6 +138,19 @@ class VulkanCommandProcessor : public CommandProcessor { std::deque> command_buffers_submitted_; DeferredCommandBuffer deferred_command_buffer_; + std::vector sparse_memory_binds_; + struct SparseBufferBind { + VkBuffer buffer; + size_t bind_offset; + uint32_t bind_count; + }; + std::vector sparse_buffer_binds_; + // SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer + // on submission (because pBinds should point to a place in std::vector, but + // it may be reallocated). + std::vector sparse_buffer_bind_infos_temp_; + VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0; + std::unique_ptr shared_memory_; }; diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index d4a4a0049..49b9cbbb0 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -15,12 +15,20 @@ #include #include "xenia/base/assert.h" +#include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/ui/vulkan/vulkan_util.h" +DEFINE_bool(vulkan_sparse_shared_memory, true, + "Enable sparse binding for shared memory emulation. Disabling it " + "increases video memory usage - a 512 MB buffer is created - but " + "allows graphics debuggers that don't support sparse binding to " + "work.", + "Vulkan"); + namespace xe { namespace gpu { namespace vulkan { @@ -43,14 +51,15 @@ bool VulkanSharedMemory::Initialize() { VkDevice device = provider.device(); const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - VkBufferCreateInfo buffer_create_info; - buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_create_info.pNext = nullptr; - buffer_create_info.flags = 0; const VkBufferCreateFlags sparse_flags = VK_BUFFER_CREATE_SPARSE_BINDING_BIT | VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT; - // TODO(Triang3l): Sparse binding. + + // Try to create a sparse buffer. + VkBufferCreateInfo buffer_create_info; + buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_create_info.pNext = nullptr; + buffer_create_info.flags = sparse_flags; buffer_create_info.size = kBufferSize; buffer_create_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | @@ -58,39 +67,90 @@ bool VulkanSharedMemory::Initialize() { buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; buffer_create_info.queueFamilyIndexCount = 0; buffer_create_info.pQueueFamilyIndices = nullptr; - VkResult buffer_create_result = - dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_); - if (buffer_create_result != VK_SUCCESS) { - if (buffer_create_info.flags & sparse_flags) { - buffer_create_info.flags &= ~sparse_flags; - buffer_create_result = - dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_); + if (cvars::vulkan_sparse_shared_memory && + provider.IsSparseBindingSupported() && + device_features.sparseResidencyBuffer) { + if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) == + VK_SUCCESS) { + VkMemoryRequirements buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer_, + &buffer_memory_requirements); + if (xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &buffer_memory_type_)) { + uint32_t allocation_size_log2; + xe::bit_scan_forward( + std::max(uint64_t(buffer_memory_requirements.alignment), + uint64_t(1)), + &allocation_size_log2); + if (allocation_size_log2 < kBufferSizeLog2) { + // Maximum of 1024 allocations in the worst case for all of the + // buffer because of the overall 4096 allocation count limit on + // Windows drivers. + InitializeSparseHostGpuMemory( + std::max(allocation_size_log2, + std::max(kHostGpuMemoryOptimalSparseAllocationLog2, + kBufferSizeLog2 - uint32_t(10)))); + } else { + // Shouldn't happen on any real platform, but no point allocating the + // buffer sparsely. + dfn.vkDestroyBuffer(device, buffer_, nullptr); + buffer_ = VK_NULL_HANDLE; + } + } else { + XELOGE( + "Shared memory: Failed to get a device-local Vulkan memory type " + "for the sparse buffer"); + dfn.vkDestroyBuffer(device, buffer_, nullptr); + buffer_ = VK_NULL_HANDLE; + } + } else { + XELOGE("Shared memory: Failed to create the {} MB Vulkan sparse buffer", + kBufferSize >> 20); } - if (buffer_create_result != VK_SUCCESS) { + } + + // Create a non-sparse buffer if there were issues with the sparse buffer. + if (buffer_ == VK_NULL_HANDLE) { + XELOGGPU( + "Vulkan sparse binding is not used for shared memory emulation - video " + "memory usage may increase significantly because a full {} MB buffer " + "will be created", + kBufferSize >> 20); + buffer_create_info.flags &= ~sparse_flags; + if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) != + VK_SUCCESS) { XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer", kBufferSize >> 20); Shutdown(); return false; } - } - VkMemoryRequirements buffer_memory_requirements; - dfn.vkGetBufferMemoryRequirements(device, buffer_, - &buffer_memory_requirements); - // TODO(Triang3l): Determine sparse binding properties from memory - // requirements. - if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & - provider.memory_types_device_local(), - &buffer_memory_type_)) { - XELOGE( - "Shared memory: Failed to get a device-local Vulkan memory type for " - "the buffer"); - Shutdown(); - return false; - } - if (!(buffer_create_info.flags & sparse_flags)) { + VkMemoryRequirements buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer_, + &buffer_memory_requirements); + if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &buffer_memory_type_)) { + XELOGE( + "Shared memory: Failed to get a device-local Vulkan memory type for " + "the buffer"); + Shutdown(); + return false; + } VkMemoryAllocateInfo buffer_memory_allocate_info; buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - buffer_memory_allocate_info.pNext = nullptr; + VkMemoryDedicatedAllocateInfoKHR buffer_memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + buffer_memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + buffer_memory_dedicated_allocate_info.pNext = nullptr; + buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE; + buffer_memory_dedicated_allocate_info.buffer = buffer_; + buffer_memory_allocate_info.pNext = + &buffer_memory_dedicated_allocate_info; + } else { + buffer_memory_allocate_info.pNext = nullptr; + } buffer_memory_allocate_info.allocationSize = buffer_memory_requirements.size; buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_; @@ -133,8 +193,6 @@ void VulkanSharedMemory::Shutdown(bool from_destructor) { VkDevice device = provider.device(); ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_); - - buffer_memory_allocated_.clear(); for (VkDeviceMemory memory : buffer_memory_) { dfn.vkFreeMemory(device, memory, nullptr); } @@ -188,6 +246,51 @@ void VulkanSharedMemory::Use(Usage usage, last_written_range_ = written_range; } +bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange( + uint32_t offset_allocations, uint32_t length_allocations) { + if (!length_allocations) { + return true; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkMemoryAllocateInfo memory_allocate_info; + memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_allocate_info.pNext = nullptr; + memory_allocate_info.allocationSize = + length_allocations << host_gpu_memory_sparse_granularity_log2(); + memory_allocate_info.memoryTypeIndex = buffer_memory_type_; + VkDeviceMemory memory; + if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) != + VK_SUCCESS) { + XELOGE("Shared memory: Failed to allocate sparse buffer memory"); + return false; + } + buffer_memory_.push_back(memory); + + VkSparseMemoryBind bind; + bind.resourceOffset = offset_allocations + << host_gpu_memory_sparse_granularity_log2(); + bind.size = memory_allocate_info.allocationSize; + bind.memory = memory; + bind.memoryOffset = 0; + bind.flags = 0; + VkPipelineStageFlags bind_wait_stage_mask = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + if (provider.device_features().tessellationShader) { + bind_wait_stage_mask |= + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; + } + command_processor_.SparseBindBuffer(buffer_, 1, &bind, bind_wait_stage_mask); + + return true; +} + bool VulkanSharedMemory::UploadRanges( const std::vector>& upload_page_ranges) { if (upload_page_ranges.empty()) { diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.h b/src/xenia/gpu/vulkan/vulkan_shared_memory.h index a64ef17f8..2d5d15a38 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.h +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.h @@ -54,14 +54,13 @@ class VulkanSharedMemory : public SharedMemory { VkBuffer buffer() const { return buffer_; } protected: + bool AllocateSparseHostGpuMemoryRange(uint32_t offset_allocations, + uint32_t length_allocations) override; + bool UploadRanges(const std::vector>& upload_page_ranges) override; private: - bool IsSparse() const { - return buffer_allocation_size_log2_ < kBufferSizeLog2; - } - void GetBarrier(Usage usage, VkPipelineStageFlags& stage_mask, VkAccessFlags& access_mask) const; @@ -70,16 +69,8 @@ class VulkanSharedMemory : public SharedMemory { VkBuffer buffer_ = VK_NULL_HANDLE; uint32_t buffer_memory_type_; - // Maximum of 1024 allocations in the worst case for all of the buffer because - // of the overall 4096 allocation count limit on Windows drivers. - static constexpr uint32_t kMinBufferAllocationSizeLog2 = - std::max(kHostGpuMemoryOptimalSparseAllocationLog2, - kBufferSizeLog2 - uint32_t(10)); - uint32_t buffer_allocation_size_log2_ = kBufferSizeLog2; - // Sparse memory allocations, of different sizes. + // Single for non-sparse, every allocation so far for sparse. std::vector buffer_memory_; - // One bit per every 2^buffer_allocation_size_log2_ of the buffer. - std::vector buffer_memory_allocated_; // First usage will likely be uploading. Usage last_usage_ = Usage::kTransferDestination; diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index bbe90b04c..179d8f40f 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -30,7 +30,7 @@ DEFINE_bool( vulkan_validation, true, "Enable Vulkan validation (VK_LAYER_KHRONOS_validation). Messages will be " "written to the OS debug log.", - "GPU"); + "Vulkan"); DEFINE_int32( vulkan_device, -1, "Index of the physical device to use, or -1 for any compatible device.", @@ -587,6 +587,7 @@ bool VulkanProvider::Initialize() { XE_VULKAN_LOAD_DFN(vkMapMemory); XE_VULKAN_LOAD_DFN(vkResetCommandPool); XE_VULKAN_LOAD_DFN(vkResetFences); + XE_VULKAN_LOAD_DFN(vkQueueBindSparse); XE_VULKAN_LOAD_DFN(vkQueuePresentKHR); XE_VULKAN_LOAD_DFN(vkQueueSubmit); XE_VULKAN_LOAD_DFN(vkUnmapMemory); diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 8d7c10ed3..9fc117a50 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -190,6 +190,7 @@ class VulkanProvider : public GraphicsProvider { PFN_vkMapMemory vkMapMemory; PFN_vkResetCommandPool vkResetCommandPool; PFN_vkResetFences vkResetFences; + PFN_vkQueueBindSparse vkQueueBindSparse; PFN_vkQueuePresentKHR vkQueuePresentKHR; PFN_vkQueueSubmit vkQueueSubmit; PFN_vkUnmapMemory vkUnmapMemory; @@ -205,9 +206,21 @@ class VulkanProvider : public GraphicsProvider { return dfn_.vkQueueSubmit(queue_graphics_compute_, submit_count, submits, fence); } - bool CanSubmitSparseBindings() const { + // Safer in Xenia context - in case a sparse binding queue was not obtained + // for some reason. + bool IsSparseBindingSupported() const { return queue_sparse_binding_ != VK_NULL_HANDLE; } + VkResult BindSparse(uint32_t bind_info_count, + const VkBindSparseInfo* bind_info, VkFence fence) { + assert_true(IsSparseBindingSupported()); + std::mutex& mutex = queue_sparse_binding_ == queue_graphics_compute_ + ? queue_graphics_compute_mutex_ + : queue_sparse_binding_separate_mutex_; + std::lock_guard lock(mutex); + return dfn_.vkQueueBindSparse(queue_sparse_binding_, bind_info_count, + bind_info, fence); + } VkResult Present(const VkPresentInfoKHR* present_info) { // FIXME(Triang3l): Allow a separate queue for present - see // vulkan_provider.cc for details.