From 4d59f556a95426fe01cc52b1513f9238d84d999a Mon Sep 17 00:00:00 2001
From: Triang3l <triang3l@yandex.ru>
Date: Wed, 7 Oct 2020 21:03:50 +0300
Subject: [PATCH] [Vulkan] Sparse shared memory

---
 src/xenia/gpu/d3d12/d3d12_shared_memory.cc    |   8 +-
 .../gpu/vulkan/vulkan_command_processor.cc    | 127 +++++++++++--
 .../gpu/vulkan/vulkan_command_processor.h     |  29 ++-
 src/xenia/gpu/vulkan/vulkan_shared_memory.cc  | 167 ++++++++++++++----
 src/xenia/gpu/vulkan/vulkan_shared_memory.h   |  17 +-
 src/xenia/ui/vulkan/vulkan_provider.cc        |   3 +-
 src/xenia/ui/vulkan/vulkan_provider.h         |  15 +-
 7 files changed, 298 insertions(+), 68 deletions(-)

diff --git a/src/xenia/gpu/d3d12/d3d12_shared_memory.cc b/src/xenia/gpu/d3d12/d3d12_shared_memory.cc
index 992f9aed5..c260545ac 100644
--- a/src/xenia/gpu/d3d12/d3d12_shared_memory.cc
+++ b/src/xenia/gpu/d3d12/d3d12_shared_memory.cc
@@ -22,9 +22,9 @@
 
 DEFINE_bool(d3d12_tiled_shared_memory, true,
             "Enable tiled resources for shared memory emulation. Disabling "
-            "them greatly increases video memory usage - a 512 MB buffer is "
-            "created - but allows graphics debuggers that don't support tiled "
-            "resources to work.",
+            "them greatly video memory usage - a 512 MB buffer is created - "
+            "but allows graphics debuggers that don't support tiled resources "
+            "to work.",
             "D3D12");
 
 namespace xe {
@@ -68,7 +68,7 @@ bool D3D12SharedMemory::Initialize() {
     XELOGGPU(
         "Direct3D 12 tiled resources are not used for shared memory "
         "emulation - video memory usage may increase significantly "
-        "because a full {} MB buffer will be created!",
+        "because a full {} MB buffer will be created",
         kBufferSize >> 20);
     if (provider.GetGraphicsAnalysis()) {
       // As of October 8th, 2018, PIX doesn't support tiled buffers.
diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc
index c688ca6ee..531182ca5 100644
--- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc
+++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc
@@ -63,6 +63,10 @@ void VulkanCommandProcessor::ShutdownContext() {
   const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
   VkDevice device = provider.device();
 
+  sparse_bind_wait_stage_mask_ = 0;
+  sparse_buffer_binds_.clear();
+  sparse_memory_binds_.clear();
+
   deferred_command_buffer_.Reset();
   for (const auto& command_buffer_pair : command_buffers_submitted_) {
     dfn.vkDestroyCommandPool(device, command_buffer_pair.first.pool, nullptr);
@@ -78,15 +82,19 @@ void VulkanCommandProcessor::ShutdownContext() {
   frame_current_ = 1;
   frame_open_ = false;
 
-  for (const auto& semaphore :
-       submissions_in_flight_sparse_binding_semaphores_) {
+  for (const auto& semaphore : submissions_in_flight_semaphores_) {
     dfn.vkDestroySemaphore(device, semaphore.first, nullptr);
   }
-  submissions_in_flight_sparse_binding_semaphores_.clear();
+  submissions_in_flight_semaphores_.clear();
   for (VkFence& fence : submissions_in_flight_fences_) {
     dfn.vkDestroyFence(device, fence, nullptr);
   }
   submissions_in_flight_fences_.clear();
+  current_submission_wait_stage_masks_.clear();
+  for (VkSemaphore semaphore : current_submission_wait_semaphores_) {
+    dfn.vkDestroySemaphore(device, semaphore, nullptr);
+  }
+  current_submission_wait_semaphores_.clear();
   submission_completed_ = 0;
   submission_open_ = false;
 
@@ -102,6 +110,22 @@ void VulkanCommandProcessor::ShutdownContext() {
   CommandProcessor::ShutdownContext();
 }
 
+void VulkanCommandProcessor::SparseBindBuffer(
+    VkBuffer buffer, uint32_t bind_count, const VkSparseMemoryBind* binds,
+    VkPipelineStageFlags wait_stage_mask) {
+  if (!bind_count) {
+    return;
+  }
+  SparseBufferBind& buffer_bind = sparse_buffer_binds_.emplace_back();
+  buffer_bind.buffer = buffer;
+  buffer_bind.bind_offset = sparse_memory_binds_.size();
+  buffer_bind.bind_count = bind_count;
+  sparse_memory_binds_.reserve(sparse_memory_binds_.size() + bind_count);
+  sparse_memory_binds_.insert(sparse_memory_binds_.end(), binds,
+                              binds + bind_count);
+  sparse_bind_wait_stage_mask_ |= wait_stage_mask;
+}
+
 void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
                                          uint32_t frontbuffer_width,
                                          uint32_t frontbuffer_height) {
@@ -233,15 +257,15 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
                                       submissions_in_flight_fences_awaited_end);
   submission_completed_ += fences_awaited;
 
-  // Reclaim semaphores used for sparse binding and graphics synchronization.
-  while (!submissions_in_flight_sparse_binding_semaphores_.empty()) {
+  // Reclaim semaphores.
+  while (!submissions_in_flight_semaphores_.empty()) {
     const auto& semaphore_submission =
-        submissions_in_flight_sparse_binding_semaphores_.front();
+        submissions_in_flight_semaphores_.front();
     if (semaphore_submission.second > submission_completed_) {
       break;
     }
     semaphores_free_.push_back(semaphore_submission.first);
-    submissions_in_flight_sparse_binding_semaphores_.pop_front();
+    submissions_in_flight_semaphores_.pop_front();
   }
 
   // Reclaim command pools.
@@ -322,14 +346,26 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
       VkFence fence;
       if (dfn.vkCreateFence(device, &fence_create_info, nullptr, &fence) !=
           VK_SUCCESS) {
-        XELOGE("Failed to create a Vulkan submission fence");
+        XELOGE("Failed to create a Vulkan fence");
         // Try to submit later. Completely dropping the submission is not
         // permitted because resources would be left in an undefined state.
         return false;
       }
       fences_free_.push_back(fence);
     }
-    // TODO(Triang3l): Create a sparse binding semaphore.
+    if (!sparse_memory_binds_.empty() && semaphores_free_.empty()) {
+      VkSemaphoreCreateInfo semaphore_create_info;
+      semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+      semaphore_create_info.pNext = nullptr;
+      semaphore_create_info.flags = 0;
+      VkSemaphore semaphore;
+      if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr,
+                                &semaphore) != VK_SUCCESS) {
+        XELOGE("Failed to create a Vulkan semaphore");
+        return false;
+      }
+      semaphores_free_.push_back(semaphore);
+    }
     if (command_buffers_writable_.empty()) {
       CommandBuffer command_buffer;
       VkCommandPoolCreateInfo command_pool_create_info;
@@ -366,6 +402,52 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
   if (submission_open_) {
     shared_memory_->EndSubmission();
 
+    // Submit sparse binds earlier, before executing the deferred command
+    // buffer, to reduce latency.
+    if (!sparse_memory_binds_.empty()) {
+      sparse_buffer_bind_infos_temp_.clear();
+      sparse_buffer_bind_infos_temp_.reserve(sparse_buffer_binds_.size());
+      for (const SparseBufferBind& sparse_buffer_bind : sparse_buffer_binds_) {
+        VkSparseBufferMemoryBindInfo& sparse_buffer_bind_info =
+            sparse_buffer_bind_infos_temp_.emplace_back();
+        sparse_buffer_bind_info.buffer = sparse_buffer_bind.buffer;
+        sparse_buffer_bind_info.bindCount = sparse_buffer_bind.bind_count;
+        sparse_buffer_bind_info.pBinds =
+            sparse_memory_binds_.data() + sparse_buffer_bind.bind_offset;
+      }
+      assert_false(semaphores_free_.empty());
+      VkSemaphore bind_sparse_semaphore = semaphores_free_.back();
+      VkBindSparseInfo bind_sparse_info;
+      bind_sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
+      bind_sparse_info.pNext = nullptr;
+      bind_sparse_info.waitSemaphoreCount = 0;
+      bind_sparse_info.pWaitSemaphores = nullptr;
+      bind_sparse_info.bufferBindCount =
+          uint32_t(sparse_buffer_bind_infos_temp_.size());
+      bind_sparse_info.pBufferBinds =
+          !sparse_buffer_bind_infos_temp_.empty()
+              ? sparse_buffer_bind_infos_temp_.data()
+              : nullptr;
+      bind_sparse_info.imageOpaqueBindCount = 0;
+      bind_sparse_info.pImageOpaqueBinds = nullptr;
+      bind_sparse_info.imageBindCount = 0;
+      bind_sparse_info.pImageBinds = 0;
+      bind_sparse_info.signalSemaphoreCount = 1;
+      bind_sparse_info.pSignalSemaphores = &bind_sparse_semaphore;
+      if (provider.BindSparse(1, &bind_sparse_info, VK_NULL_HANDLE) !=
+          VK_SUCCESS) {
+        XELOGE("Failed to submit Vulkan sparse binds");
+        return false;
+      }
+      current_submission_wait_semaphores_.push_back(bind_sparse_semaphore);
+      semaphores_free_.pop_back();
+      current_submission_wait_stage_masks_.push_back(
+          sparse_bind_wait_stage_mask_);
+      sparse_bind_wait_stage_mask_ = 0;
+      sparse_buffer_binds_.clear();
+      sparse_memory_binds_.clear();
+    }
+
     assert_false(command_buffers_writable_.empty());
     CommandBuffer command_buffer = command_buffers_writable_.back();
     if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) {
@@ -385,18 +467,25 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
       return false;
     }
     deferred_command_buffer_.Execute(command_buffer.buffer);
-    // TODO(Triang3l): Write deferred command buffer commands.
     if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) {
       XELOGE("Failed to end a Vulkan command buffer");
       return false;
     }
-    // TODO(Triang3l): Submit sparse binding.
+
     VkSubmitInfo submit_info;
     submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
     submit_info.pNext = nullptr;
-    submit_info.waitSemaphoreCount = 0;
-    submit_info.pWaitSemaphores = nullptr;
-    submit_info.pWaitDstStageMask = nullptr;
+    if (!current_submission_wait_semaphores_.empty()) {
+      submit_info.waitSemaphoreCount =
+          uint32_t(current_submission_wait_semaphores_.size());
+      submit_info.pWaitSemaphores = current_submission_wait_semaphores_.data();
+      submit_info.pWaitDstStageMask =
+          current_submission_wait_stage_masks_.data();
+    } else {
+      submit_info.waitSemaphoreCount = 0;
+      submit_info.pWaitSemaphores = nullptr;
+      submit_info.pWaitDstStageMask = nullptr;
+    }
     submit_info.commandBufferCount = 1;
     submit_info.pCommandBuffers = &command_buffer.buffer;
     submit_info.signalSemaphoreCount = 0;
@@ -412,8 +501,14 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
       XELOGE("Failed to submit a Vulkan command buffer");
       return false;
     }
-    command_buffers_submitted_.push_back(
-        std::make_pair(command_buffer, GetCurrentSubmission()));
+    uint64_t submission_current = GetCurrentSubmission();
+    current_submission_wait_stage_masks_.clear();
+    for (VkSemaphore semaphore : current_submission_wait_semaphores_) {
+      submissions_in_flight_semaphores_.emplace_back(semaphore,
+                                                     submission_current);
+    }
+    current_submission_wait_semaphores_.clear();
+    command_buffers_submitted_.emplace_back(command_buffer, submission_current);
     command_buffers_writable_.pop_back();
     // Increments the current submission number, going to the next submission.
     submissions_in_flight_fences_.push_back(fence);
diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h
index 016f9f7d0..6b9096a20 100644
--- a/src/xenia/gpu/vulkan/vulkan_command_processor.h
+++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h
@@ -54,6 +54,16 @@ class VulkanCommandProcessor : public CommandProcessor {
   }
   uint64_t GetCompletedSubmission() const { return submission_completed_; }
 
+  // Sparse binds are:
+  // - In a single submission, all submitted in one vkQueueBindSparse.
+  // - Sent to the queue without waiting for a semaphore.
+  // Thus, multiple sparse binds between the completed and the current
+  // submission, and within one submission, must not touch any overlapping
+  // memory regions.
+  void SparseBindBuffer(VkBuffer buffer, uint32_t bind_count,
+                        const VkSparseMemoryBind* binds,
+                        VkPipelineStageFlags wait_stage_mask);
+
  protected:
   bool SetupContext() override;
   void ShutdownContext() override;
@@ -103,9 +113,13 @@ class VulkanCommandProcessor : public CommandProcessor {
 
   bool submission_open_ = false;
   uint64_t submission_completed_ = 0;
+  // In case vkQueueSubmit fails after something like a successful
+  // vkQueueBindSparse, to wait correctly on the next attempt.
+  std::vector<VkSemaphore> current_submission_wait_semaphores_;
+  std::vector<VkPipelineStageFlags> current_submission_wait_stage_masks_;
   std::vector<VkFence> submissions_in_flight_fences_;
   std::deque<std::pair<VkSemaphore, uint64_t>>
-      submissions_in_flight_sparse_binding_semaphores_;
+      submissions_in_flight_semaphores_;
 
   static constexpr uint32_t kMaxFramesInFlight = 3;
   bool frame_open_ = false;
@@ -124,6 +138,19 @@ class VulkanCommandProcessor : public CommandProcessor {
   std::deque<std::pair<CommandBuffer, uint64_t>> command_buffers_submitted_;
   DeferredCommandBuffer deferred_command_buffer_;
 
+  std::vector<VkSparseMemoryBind> sparse_memory_binds_;
+  struct SparseBufferBind {
+    VkBuffer buffer;
+    size_t bind_offset;
+    uint32_t bind_count;
+  };
+  std::vector<SparseBufferBind> sparse_buffer_binds_;
+  // SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer
+  // on submission (because pBinds should point to a place in std::vector, but
+  // it may be reallocated).
+  std::vector<VkSparseBufferMemoryBindInfo> sparse_buffer_bind_infos_temp_;
+  VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0;
+
   std::unique_ptr<VulkanSharedMemory> shared_memory_;
 };
 
diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc
index d4a4a0049..49b9cbbb0 100644
--- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc
+++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc
@@ -15,12 +15,20 @@
 #include <vector>
 
 #include "xenia/base/assert.h"
+#include "xenia/base/cvar.h"
 #include "xenia/base/logging.h"
 #include "xenia/base/math.h"
 #include "xenia/gpu/vulkan/deferred_command_buffer.h"
 #include "xenia/gpu/vulkan/vulkan_command_processor.h"
 #include "xenia/ui/vulkan/vulkan_util.h"
 
+DEFINE_bool(vulkan_sparse_shared_memory, true,
+            "Enable sparse binding for shared memory emulation. Disabling it "
+            "increases video memory usage - a 512 MB buffer is created - but "
+            "allows graphics debuggers that don't support sparse binding to "
+            "work.",
+            "Vulkan");
+
 namespace xe {
 namespace gpu {
 namespace vulkan {
@@ -43,14 +51,15 @@ bool VulkanSharedMemory::Initialize() {
   VkDevice device = provider.device();
   const VkPhysicalDeviceFeatures& device_features = provider.device_features();
 
-  VkBufferCreateInfo buffer_create_info;
-  buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-  buffer_create_info.pNext = nullptr;
-  buffer_create_info.flags = 0;
   const VkBufferCreateFlags sparse_flags =
       VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
       VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
-  // TODO(Triang3l): Sparse binding.
+
+  // Try to create a sparse buffer.
+  VkBufferCreateInfo buffer_create_info;
+  buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+  buffer_create_info.pNext = nullptr;
+  buffer_create_info.flags = sparse_flags;
   buffer_create_info.size = kBufferSize;
   buffer_create_info.usage =
       VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
@@ -58,39 +67,90 @@ bool VulkanSharedMemory::Initialize() {
   buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
   buffer_create_info.queueFamilyIndexCount = 0;
   buffer_create_info.pQueueFamilyIndices = nullptr;
-  VkResult buffer_create_result =
-      dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_);
-  if (buffer_create_result != VK_SUCCESS) {
-    if (buffer_create_info.flags & sparse_flags) {
-      buffer_create_info.flags &= ~sparse_flags;
-      buffer_create_result =
-          dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_);
+  if (cvars::vulkan_sparse_shared_memory &&
+      provider.IsSparseBindingSupported() &&
+      device_features.sparseResidencyBuffer) {
+    if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) ==
+        VK_SUCCESS) {
+      VkMemoryRequirements buffer_memory_requirements;
+      dfn.vkGetBufferMemoryRequirements(device, buffer_,
+                                        &buffer_memory_requirements);
+      if (xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
+                                   provider.memory_types_device_local(),
+                               &buffer_memory_type_)) {
+        uint32_t allocation_size_log2;
+        xe::bit_scan_forward(
+            std::max(uint64_t(buffer_memory_requirements.alignment),
+                     uint64_t(1)),
+            &allocation_size_log2);
+        if (allocation_size_log2 < kBufferSizeLog2) {
+          // Maximum of 1024 allocations in the worst case for all of the
+          // buffer because of the overall 4096 allocation count limit on
+          // Windows drivers.
+          InitializeSparseHostGpuMemory(
+              std::max(allocation_size_log2,
+                       std::max(kHostGpuMemoryOptimalSparseAllocationLog2,
+                                kBufferSizeLog2 - uint32_t(10))));
+        } else {
+          // Shouldn't happen on any real platform, but no point allocating the
+          // buffer sparsely.
+          dfn.vkDestroyBuffer(device, buffer_, nullptr);
+          buffer_ = VK_NULL_HANDLE;
+        }
+      } else {
+        XELOGE(
+            "Shared memory: Failed to get a device-local Vulkan memory type "
+            "for the sparse buffer");
+        dfn.vkDestroyBuffer(device, buffer_, nullptr);
+        buffer_ = VK_NULL_HANDLE;
+      }
+    } else {
+      XELOGE("Shared memory: Failed to create the {} MB Vulkan sparse buffer",
+             kBufferSize >> 20);
     }
-    if (buffer_create_result != VK_SUCCESS) {
+  }
+
+  // Create a non-sparse buffer if there were issues with the sparse buffer.
+  if (buffer_ == VK_NULL_HANDLE) {
+    XELOGGPU(
+        "Vulkan sparse binding is not used for shared memory emulation - video "
+        "memory usage may increase significantly because a full {} MB buffer "
+        "will be created",
+        kBufferSize >> 20);
+    buffer_create_info.flags &= ~sparse_flags;
+    if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) !=
+        VK_SUCCESS) {
       XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer",
              kBufferSize >> 20);
       Shutdown();
       return false;
     }
-  }
-  VkMemoryRequirements buffer_memory_requirements;
-  dfn.vkGetBufferMemoryRequirements(device, buffer_,
-                                    &buffer_memory_requirements);
-  // TODO(Triang3l): Determine sparse binding properties from memory
-  // requirements.
-  if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
-                                provider.memory_types_device_local(),
-                            &buffer_memory_type_)) {
-    XELOGE(
-        "Shared memory: Failed to get a device-local Vulkan memory type for "
-        "the buffer");
-    Shutdown();
-    return false;
-  }
-  if (!(buffer_create_info.flags & sparse_flags)) {
+    VkMemoryRequirements buffer_memory_requirements;
+    dfn.vkGetBufferMemoryRequirements(device, buffer_,
+                                      &buffer_memory_requirements);
+    if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
+                                  provider.memory_types_device_local(),
+                              &buffer_memory_type_)) {
+      XELOGE(
+          "Shared memory: Failed to get a device-local Vulkan memory type for "
+          "the buffer");
+      Shutdown();
+      return false;
+    }
     VkMemoryAllocateInfo buffer_memory_allocate_info;
     buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
-    buffer_memory_allocate_info.pNext = nullptr;
+    VkMemoryDedicatedAllocateInfoKHR buffer_memory_dedicated_allocate_info;
+    if (provider.device_extensions().khr_dedicated_allocation) {
+      buffer_memory_dedicated_allocate_info.sType =
+          VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
+      buffer_memory_dedicated_allocate_info.pNext = nullptr;
+      buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE;
+      buffer_memory_dedicated_allocate_info.buffer = buffer_;
+      buffer_memory_allocate_info.pNext =
+          &buffer_memory_dedicated_allocate_info;
+    } else {
+      buffer_memory_allocate_info.pNext = nullptr;
+    }
     buffer_memory_allocate_info.allocationSize =
         buffer_memory_requirements.size;
     buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
@@ -133,8 +193,6 @@ void VulkanSharedMemory::Shutdown(bool from_destructor) {
   VkDevice device = provider.device();
 
   ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_);
-
-  buffer_memory_allocated_.clear();
   for (VkDeviceMemory memory : buffer_memory_) {
     dfn.vkFreeMemory(device, memory, nullptr);
   }
@@ -188,6 +246,51 @@ void VulkanSharedMemory::Use(Usage usage,
   last_written_range_ = written_range;
 }
 
+bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange(
+    uint32_t offset_allocations, uint32_t length_allocations) {
+  if (!length_allocations) {
+    return true;
+  }
+
+  const ui::vulkan::VulkanProvider& provider =
+      command_processor_.GetVulkanContext().GetVulkanProvider();
+  const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
+  VkDevice device = provider.device();
+
+  VkMemoryAllocateInfo memory_allocate_info;
+  memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+  memory_allocate_info.pNext = nullptr;
+  memory_allocate_info.allocationSize =
+      length_allocations << host_gpu_memory_sparse_granularity_log2();
+  memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
+  VkDeviceMemory memory;
+  if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) !=
+      VK_SUCCESS) {
+    XELOGE("Shared memory: Failed to allocate sparse buffer memory");
+    return false;
+  }
+  buffer_memory_.push_back(memory);
+
+  VkSparseMemoryBind bind;
+  bind.resourceOffset = offset_allocations
+                        << host_gpu_memory_sparse_granularity_log2();
+  bind.size = memory_allocate_info.allocationSize;
+  bind.memory = memory;
+  bind.memoryOffset = 0;
+  bind.flags = 0;
+  VkPipelineStageFlags bind_wait_stage_mask =
+      VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
+      VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
+      VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
+  if (provider.device_features().tessellationShader) {
+    bind_wait_stage_mask |=
+        VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
+  }
+  command_processor_.SparseBindBuffer(buffer_, 1, &bind, bind_wait_stage_mask);
+
+  return true;
+}
+
 bool VulkanSharedMemory::UploadRanges(
     const std::vector<std::pair<uint32_t, uint32_t>>& upload_page_ranges) {
   if (upload_page_ranges.empty()) {
diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.h b/src/xenia/gpu/vulkan/vulkan_shared_memory.h
index a64ef17f8..2d5d15a38 100644
--- a/src/xenia/gpu/vulkan/vulkan_shared_memory.h
+++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.h
@@ -54,14 +54,13 @@ class VulkanSharedMemory : public SharedMemory {
   VkBuffer buffer() const { return buffer_; }
 
  protected:
+  bool AllocateSparseHostGpuMemoryRange(uint32_t offset_allocations,
+                                        uint32_t length_allocations) override;
+
   bool UploadRanges(const std::vector<std::pair<uint32_t, uint32_t>>&
                         upload_page_ranges) override;
 
  private:
-  bool IsSparse() const {
-    return buffer_allocation_size_log2_ < kBufferSizeLog2;
-  }
-
   void GetBarrier(Usage usage, VkPipelineStageFlags& stage_mask,
                   VkAccessFlags& access_mask) const;
 
@@ -70,16 +69,8 @@ class VulkanSharedMemory : public SharedMemory {
 
   VkBuffer buffer_ = VK_NULL_HANDLE;
   uint32_t buffer_memory_type_;
-  // Maximum of 1024 allocations in the worst case for all of the buffer because
-  // of the overall 4096 allocation count limit on Windows drivers.
-  static constexpr uint32_t kMinBufferAllocationSizeLog2 =
-      std::max(kHostGpuMemoryOptimalSparseAllocationLog2,
-               kBufferSizeLog2 - uint32_t(10));
-  uint32_t buffer_allocation_size_log2_ = kBufferSizeLog2;
-  // Sparse memory allocations, of different sizes.
+  // Single for non-sparse, every allocation so far for sparse.
   std::vector<VkDeviceMemory> buffer_memory_;
-  // One bit per every 2^buffer_allocation_size_log2_ of the buffer.
-  std::vector<uint64_t> buffer_memory_allocated_;
 
   // First usage will likely be uploading.
   Usage last_usage_ = Usage::kTransferDestination;
diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc
index bbe90b04c..179d8f40f 100644
--- a/src/xenia/ui/vulkan/vulkan_provider.cc
+++ b/src/xenia/ui/vulkan/vulkan_provider.cc
@@ -30,7 +30,7 @@ DEFINE_bool(
     vulkan_validation, true,
     "Enable Vulkan validation (VK_LAYER_KHRONOS_validation). Messages will be "
     "written to the OS debug log.",
-    "GPU");
+    "Vulkan");
 DEFINE_int32(
     vulkan_device, -1,
     "Index of the physical device to use, or -1 for any compatible device.",
@@ -587,6 +587,7 @@ bool VulkanProvider::Initialize() {
   XE_VULKAN_LOAD_DFN(vkMapMemory);
   XE_VULKAN_LOAD_DFN(vkResetCommandPool);
   XE_VULKAN_LOAD_DFN(vkResetFences);
+  XE_VULKAN_LOAD_DFN(vkQueueBindSparse);
   XE_VULKAN_LOAD_DFN(vkQueuePresentKHR);
   XE_VULKAN_LOAD_DFN(vkQueueSubmit);
   XE_VULKAN_LOAD_DFN(vkUnmapMemory);
diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h
index 8d7c10ed3..9fc117a50 100644
--- a/src/xenia/ui/vulkan/vulkan_provider.h
+++ b/src/xenia/ui/vulkan/vulkan_provider.h
@@ -190,6 +190,7 @@ class VulkanProvider : public GraphicsProvider {
     PFN_vkMapMemory vkMapMemory;
     PFN_vkResetCommandPool vkResetCommandPool;
     PFN_vkResetFences vkResetFences;
+    PFN_vkQueueBindSparse vkQueueBindSparse;
     PFN_vkQueuePresentKHR vkQueuePresentKHR;
     PFN_vkQueueSubmit vkQueueSubmit;
     PFN_vkUnmapMemory vkUnmapMemory;
@@ -205,9 +206,21 @@ class VulkanProvider : public GraphicsProvider {
     return dfn_.vkQueueSubmit(queue_graphics_compute_, submit_count, submits,
                               fence);
   }
-  bool CanSubmitSparseBindings() const {
+  // Safer in Xenia context - in case a sparse binding queue was not obtained
+  // for some reason.
+  bool IsSparseBindingSupported() const {
     return queue_sparse_binding_ != VK_NULL_HANDLE;
   }
+  VkResult BindSparse(uint32_t bind_info_count,
+                      const VkBindSparseInfo* bind_info, VkFence fence) {
+    assert_true(IsSparseBindingSupported());
+    std::mutex& mutex = queue_sparse_binding_ == queue_graphics_compute_
+                            ? queue_graphics_compute_mutex_
+                            : queue_sparse_binding_separate_mutex_;
+    std::lock_guard<std::mutex> lock(mutex);
+    return dfn_.vkQueueBindSparse(queue_sparse_binding_, bind_info_count,
+                                  bind_info, fence);
+  }
   VkResult Present(const VkPresentInfoKHR* present_info) {
     // FIXME(Triang3l): Allow a separate queue for present - see
     // vulkan_provider.cc for details.