[Vulkan] Sparse shared memory
This commit is contained in:
parent
fee2189d39
commit
4d59f556a9
|
@ -22,9 +22,9 @@
|
||||||
|
|
||||||
DEFINE_bool(d3d12_tiled_shared_memory, true,
|
DEFINE_bool(d3d12_tiled_shared_memory, true,
|
||||||
"Enable tiled resources for shared memory emulation. Disabling "
|
"Enable tiled resources for shared memory emulation. Disabling "
|
||||||
"them greatly increases video memory usage - a 512 MB buffer is "
|
"them greatly video memory usage - a 512 MB buffer is created - "
|
||||||
"created - but allows graphics debuggers that don't support tiled "
|
"but allows graphics debuggers that don't support tiled resources "
|
||||||
"resources to work.",
|
"to work.",
|
||||||
"D3D12");
|
"D3D12");
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
@ -68,7 +68,7 @@ bool D3D12SharedMemory::Initialize() {
|
||||||
XELOGGPU(
|
XELOGGPU(
|
||||||
"Direct3D 12 tiled resources are not used for shared memory "
|
"Direct3D 12 tiled resources are not used for shared memory "
|
||||||
"emulation - video memory usage may increase significantly "
|
"emulation - video memory usage may increase significantly "
|
||||||
"because a full {} MB buffer will be created!",
|
"because a full {} MB buffer will be created",
|
||||||
kBufferSize >> 20);
|
kBufferSize >> 20);
|
||||||
if (provider.GetGraphicsAnalysis()) {
|
if (provider.GetGraphicsAnalysis()) {
|
||||||
// As of October 8th, 2018, PIX doesn't support tiled buffers.
|
// As of October 8th, 2018, PIX doesn't support tiled buffers.
|
||||||
|
|
|
@ -63,6 +63,10 @@ void VulkanCommandProcessor::ShutdownContext() {
|
||||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||||
VkDevice device = provider.device();
|
VkDevice device = provider.device();
|
||||||
|
|
||||||
|
sparse_bind_wait_stage_mask_ = 0;
|
||||||
|
sparse_buffer_binds_.clear();
|
||||||
|
sparse_memory_binds_.clear();
|
||||||
|
|
||||||
deferred_command_buffer_.Reset();
|
deferred_command_buffer_.Reset();
|
||||||
for (const auto& command_buffer_pair : command_buffers_submitted_) {
|
for (const auto& command_buffer_pair : command_buffers_submitted_) {
|
||||||
dfn.vkDestroyCommandPool(device, command_buffer_pair.first.pool, nullptr);
|
dfn.vkDestroyCommandPool(device, command_buffer_pair.first.pool, nullptr);
|
||||||
|
@ -78,15 +82,19 @@ void VulkanCommandProcessor::ShutdownContext() {
|
||||||
frame_current_ = 1;
|
frame_current_ = 1;
|
||||||
frame_open_ = false;
|
frame_open_ = false;
|
||||||
|
|
||||||
for (const auto& semaphore :
|
for (const auto& semaphore : submissions_in_flight_semaphores_) {
|
||||||
submissions_in_flight_sparse_binding_semaphores_) {
|
|
||||||
dfn.vkDestroySemaphore(device, semaphore.first, nullptr);
|
dfn.vkDestroySemaphore(device, semaphore.first, nullptr);
|
||||||
}
|
}
|
||||||
submissions_in_flight_sparse_binding_semaphores_.clear();
|
submissions_in_flight_semaphores_.clear();
|
||||||
for (VkFence& fence : submissions_in_flight_fences_) {
|
for (VkFence& fence : submissions_in_flight_fences_) {
|
||||||
dfn.vkDestroyFence(device, fence, nullptr);
|
dfn.vkDestroyFence(device, fence, nullptr);
|
||||||
}
|
}
|
||||||
submissions_in_flight_fences_.clear();
|
submissions_in_flight_fences_.clear();
|
||||||
|
current_submission_wait_stage_masks_.clear();
|
||||||
|
for (VkSemaphore semaphore : current_submission_wait_semaphores_) {
|
||||||
|
dfn.vkDestroySemaphore(device, semaphore, nullptr);
|
||||||
|
}
|
||||||
|
current_submission_wait_semaphores_.clear();
|
||||||
submission_completed_ = 0;
|
submission_completed_ = 0;
|
||||||
submission_open_ = false;
|
submission_open_ = false;
|
||||||
|
|
||||||
|
@ -102,6 +110,22 @@ void VulkanCommandProcessor::ShutdownContext() {
|
||||||
CommandProcessor::ShutdownContext();
|
CommandProcessor::ShutdownContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VulkanCommandProcessor::SparseBindBuffer(
|
||||||
|
VkBuffer buffer, uint32_t bind_count, const VkSparseMemoryBind* binds,
|
||||||
|
VkPipelineStageFlags wait_stage_mask) {
|
||||||
|
if (!bind_count) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
SparseBufferBind& buffer_bind = sparse_buffer_binds_.emplace_back();
|
||||||
|
buffer_bind.buffer = buffer;
|
||||||
|
buffer_bind.bind_offset = sparse_memory_binds_.size();
|
||||||
|
buffer_bind.bind_count = bind_count;
|
||||||
|
sparse_memory_binds_.reserve(sparse_memory_binds_.size() + bind_count);
|
||||||
|
sparse_memory_binds_.insert(sparse_memory_binds_.end(), binds,
|
||||||
|
binds + bind_count);
|
||||||
|
sparse_bind_wait_stage_mask_ |= wait_stage_mask;
|
||||||
|
}
|
||||||
|
|
||||||
void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
||||||
uint32_t frontbuffer_width,
|
uint32_t frontbuffer_width,
|
||||||
uint32_t frontbuffer_height) {
|
uint32_t frontbuffer_height) {
|
||||||
|
@ -233,15 +257,15 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
|
||||||
submissions_in_flight_fences_awaited_end);
|
submissions_in_flight_fences_awaited_end);
|
||||||
submission_completed_ += fences_awaited;
|
submission_completed_ += fences_awaited;
|
||||||
|
|
||||||
// Reclaim semaphores used for sparse binding and graphics synchronization.
|
// Reclaim semaphores.
|
||||||
while (!submissions_in_flight_sparse_binding_semaphores_.empty()) {
|
while (!submissions_in_flight_semaphores_.empty()) {
|
||||||
const auto& semaphore_submission =
|
const auto& semaphore_submission =
|
||||||
submissions_in_flight_sparse_binding_semaphores_.front();
|
submissions_in_flight_semaphores_.front();
|
||||||
if (semaphore_submission.second > submission_completed_) {
|
if (semaphore_submission.second > submission_completed_) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
semaphores_free_.push_back(semaphore_submission.first);
|
semaphores_free_.push_back(semaphore_submission.first);
|
||||||
submissions_in_flight_sparse_binding_semaphores_.pop_front();
|
submissions_in_flight_semaphores_.pop_front();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reclaim command pools.
|
// Reclaim command pools.
|
||||||
|
@ -322,14 +346,26 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
|
||||||
VkFence fence;
|
VkFence fence;
|
||||||
if (dfn.vkCreateFence(device, &fence_create_info, nullptr, &fence) !=
|
if (dfn.vkCreateFence(device, &fence_create_info, nullptr, &fence) !=
|
||||||
VK_SUCCESS) {
|
VK_SUCCESS) {
|
||||||
XELOGE("Failed to create a Vulkan submission fence");
|
XELOGE("Failed to create a Vulkan fence");
|
||||||
// Try to submit later. Completely dropping the submission is not
|
// Try to submit later. Completely dropping the submission is not
|
||||||
// permitted because resources would be left in an undefined state.
|
// permitted because resources would be left in an undefined state.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
fences_free_.push_back(fence);
|
fences_free_.push_back(fence);
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Create a sparse binding semaphore.
|
if (!sparse_memory_binds_.empty() && semaphores_free_.empty()) {
|
||||||
|
VkSemaphoreCreateInfo semaphore_create_info;
|
||||||
|
semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||||
|
semaphore_create_info.pNext = nullptr;
|
||||||
|
semaphore_create_info.flags = 0;
|
||||||
|
VkSemaphore semaphore;
|
||||||
|
if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr,
|
||||||
|
&semaphore) != VK_SUCCESS) {
|
||||||
|
XELOGE("Failed to create a Vulkan semaphore");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
semaphores_free_.push_back(semaphore);
|
||||||
|
}
|
||||||
if (command_buffers_writable_.empty()) {
|
if (command_buffers_writable_.empty()) {
|
||||||
CommandBuffer command_buffer;
|
CommandBuffer command_buffer;
|
||||||
VkCommandPoolCreateInfo command_pool_create_info;
|
VkCommandPoolCreateInfo command_pool_create_info;
|
||||||
|
@ -366,6 +402,52 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
|
||||||
if (submission_open_) {
|
if (submission_open_) {
|
||||||
shared_memory_->EndSubmission();
|
shared_memory_->EndSubmission();
|
||||||
|
|
||||||
|
// Submit sparse binds earlier, before executing the deferred command
|
||||||
|
// buffer, to reduce latency.
|
||||||
|
if (!sparse_memory_binds_.empty()) {
|
||||||
|
sparse_buffer_bind_infos_temp_.clear();
|
||||||
|
sparse_buffer_bind_infos_temp_.reserve(sparse_buffer_binds_.size());
|
||||||
|
for (const SparseBufferBind& sparse_buffer_bind : sparse_buffer_binds_) {
|
||||||
|
VkSparseBufferMemoryBindInfo& sparse_buffer_bind_info =
|
||||||
|
sparse_buffer_bind_infos_temp_.emplace_back();
|
||||||
|
sparse_buffer_bind_info.buffer = sparse_buffer_bind.buffer;
|
||||||
|
sparse_buffer_bind_info.bindCount = sparse_buffer_bind.bind_count;
|
||||||
|
sparse_buffer_bind_info.pBinds =
|
||||||
|
sparse_memory_binds_.data() + sparse_buffer_bind.bind_offset;
|
||||||
|
}
|
||||||
|
assert_false(semaphores_free_.empty());
|
||||||
|
VkSemaphore bind_sparse_semaphore = semaphores_free_.back();
|
||||||
|
VkBindSparseInfo bind_sparse_info;
|
||||||
|
bind_sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
|
||||||
|
bind_sparse_info.pNext = nullptr;
|
||||||
|
bind_sparse_info.waitSemaphoreCount = 0;
|
||||||
|
bind_sparse_info.pWaitSemaphores = nullptr;
|
||||||
|
bind_sparse_info.bufferBindCount =
|
||||||
|
uint32_t(sparse_buffer_bind_infos_temp_.size());
|
||||||
|
bind_sparse_info.pBufferBinds =
|
||||||
|
!sparse_buffer_bind_infos_temp_.empty()
|
||||||
|
? sparse_buffer_bind_infos_temp_.data()
|
||||||
|
: nullptr;
|
||||||
|
bind_sparse_info.imageOpaqueBindCount = 0;
|
||||||
|
bind_sparse_info.pImageOpaqueBinds = nullptr;
|
||||||
|
bind_sparse_info.imageBindCount = 0;
|
||||||
|
bind_sparse_info.pImageBinds = 0;
|
||||||
|
bind_sparse_info.signalSemaphoreCount = 1;
|
||||||
|
bind_sparse_info.pSignalSemaphores = &bind_sparse_semaphore;
|
||||||
|
if (provider.BindSparse(1, &bind_sparse_info, VK_NULL_HANDLE) !=
|
||||||
|
VK_SUCCESS) {
|
||||||
|
XELOGE("Failed to submit Vulkan sparse binds");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
current_submission_wait_semaphores_.push_back(bind_sparse_semaphore);
|
||||||
|
semaphores_free_.pop_back();
|
||||||
|
current_submission_wait_stage_masks_.push_back(
|
||||||
|
sparse_bind_wait_stage_mask_);
|
||||||
|
sparse_bind_wait_stage_mask_ = 0;
|
||||||
|
sparse_buffer_binds_.clear();
|
||||||
|
sparse_memory_binds_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
assert_false(command_buffers_writable_.empty());
|
assert_false(command_buffers_writable_.empty());
|
||||||
CommandBuffer command_buffer = command_buffers_writable_.back();
|
CommandBuffer command_buffer = command_buffers_writable_.back();
|
||||||
if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) {
|
if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) {
|
||||||
|
@ -385,18 +467,25 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
deferred_command_buffer_.Execute(command_buffer.buffer);
|
deferred_command_buffer_.Execute(command_buffer.buffer);
|
||||||
// TODO(Triang3l): Write deferred command buffer commands.
|
|
||||||
if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) {
|
if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) {
|
||||||
XELOGE("Failed to end a Vulkan command buffer");
|
XELOGE("Failed to end a Vulkan command buffer");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Submit sparse binding.
|
|
||||||
VkSubmitInfo submit_info;
|
VkSubmitInfo submit_info;
|
||||||
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||||
submit_info.pNext = nullptr;
|
submit_info.pNext = nullptr;
|
||||||
|
if (!current_submission_wait_semaphores_.empty()) {
|
||||||
|
submit_info.waitSemaphoreCount =
|
||||||
|
uint32_t(current_submission_wait_semaphores_.size());
|
||||||
|
submit_info.pWaitSemaphores = current_submission_wait_semaphores_.data();
|
||||||
|
submit_info.pWaitDstStageMask =
|
||||||
|
current_submission_wait_stage_masks_.data();
|
||||||
|
} else {
|
||||||
submit_info.waitSemaphoreCount = 0;
|
submit_info.waitSemaphoreCount = 0;
|
||||||
submit_info.pWaitSemaphores = nullptr;
|
submit_info.pWaitSemaphores = nullptr;
|
||||||
submit_info.pWaitDstStageMask = nullptr;
|
submit_info.pWaitDstStageMask = nullptr;
|
||||||
|
}
|
||||||
submit_info.commandBufferCount = 1;
|
submit_info.commandBufferCount = 1;
|
||||||
submit_info.pCommandBuffers = &command_buffer.buffer;
|
submit_info.pCommandBuffers = &command_buffer.buffer;
|
||||||
submit_info.signalSemaphoreCount = 0;
|
submit_info.signalSemaphoreCount = 0;
|
||||||
|
@ -412,8 +501,14 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
|
||||||
XELOGE("Failed to submit a Vulkan command buffer");
|
XELOGE("Failed to submit a Vulkan command buffer");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
command_buffers_submitted_.push_back(
|
uint64_t submission_current = GetCurrentSubmission();
|
||||||
std::make_pair(command_buffer, GetCurrentSubmission()));
|
current_submission_wait_stage_masks_.clear();
|
||||||
|
for (VkSemaphore semaphore : current_submission_wait_semaphores_) {
|
||||||
|
submissions_in_flight_semaphores_.emplace_back(semaphore,
|
||||||
|
submission_current);
|
||||||
|
}
|
||||||
|
current_submission_wait_semaphores_.clear();
|
||||||
|
command_buffers_submitted_.emplace_back(command_buffer, submission_current);
|
||||||
command_buffers_writable_.pop_back();
|
command_buffers_writable_.pop_back();
|
||||||
// Increments the current submission number, going to the next submission.
|
// Increments the current submission number, going to the next submission.
|
||||||
submissions_in_flight_fences_.push_back(fence);
|
submissions_in_flight_fences_.push_back(fence);
|
||||||
|
|
|
@ -54,6 +54,16 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||||
}
|
}
|
||||||
uint64_t GetCompletedSubmission() const { return submission_completed_; }
|
uint64_t GetCompletedSubmission() const { return submission_completed_; }
|
||||||
|
|
||||||
|
// Sparse binds are:
|
||||||
|
// - In a single submission, all submitted in one vkQueueBindSparse.
|
||||||
|
// - Sent to the queue without waiting for a semaphore.
|
||||||
|
// Thus, multiple sparse binds between the completed and the current
|
||||||
|
// submission, and within one submission, must not touch any overlapping
|
||||||
|
// memory regions.
|
||||||
|
void SparseBindBuffer(VkBuffer buffer, uint32_t bind_count,
|
||||||
|
const VkSparseMemoryBind* binds,
|
||||||
|
VkPipelineStageFlags wait_stage_mask);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool SetupContext() override;
|
bool SetupContext() override;
|
||||||
void ShutdownContext() override;
|
void ShutdownContext() override;
|
||||||
|
@ -103,9 +113,13 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
bool submission_open_ = false;
|
bool submission_open_ = false;
|
||||||
uint64_t submission_completed_ = 0;
|
uint64_t submission_completed_ = 0;
|
||||||
|
// In case vkQueueSubmit fails after something like a successful
|
||||||
|
// vkQueueBindSparse, to wait correctly on the next attempt.
|
||||||
|
std::vector<VkSemaphore> current_submission_wait_semaphores_;
|
||||||
|
std::vector<VkPipelineStageFlags> current_submission_wait_stage_masks_;
|
||||||
std::vector<VkFence> submissions_in_flight_fences_;
|
std::vector<VkFence> submissions_in_flight_fences_;
|
||||||
std::deque<std::pair<VkSemaphore, uint64_t>>
|
std::deque<std::pair<VkSemaphore, uint64_t>>
|
||||||
submissions_in_flight_sparse_binding_semaphores_;
|
submissions_in_flight_semaphores_;
|
||||||
|
|
||||||
static constexpr uint32_t kMaxFramesInFlight = 3;
|
static constexpr uint32_t kMaxFramesInFlight = 3;
|
||||||
bool frame_open_ = false;
|
bool frame_open_ = false;
|
||||||
|
@ -124,6 +138,19 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||||
std::deque<std::pair<CommandBuffer, uint64_t>> command_buffers_submitted_;
|
std::deque<std::pair<CommandBuffer, uint64_t>> command_buffers_submitted_;
|
||||||
DeferredCommandBuffer deferred_command_buffer_;
|
DeferredCommandBuffer deferred_command_buffer_;
|
||||||
|
|
||||||
|
std::vector<VkSparseMemoryBind> sparse_memory_binds_;
|
||||||
|
struct SparseBufferBind {
|
||||||
|
VkBuffer buffer;
|
||||||
|
size_t bind_offset;
|
||||||
|
uint32_t bind_count;
|
||||||
|
};
|
||||||
|
std::vector<SparseBufferBind> sparse_buffer_binds_;
|
||||||
|
// SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer
|
||||||
|
// on submission (because pBinds should point to a place in std::vector, but
|
||||||
|
// it may be reallocated).
|
||||||
|
std::vector<VkSparseBufferMemoryBindInfo> sparse_buffer_bind_infos_temp_;
|
||||||
|
VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0;
|
||||||
|
|
||||||
std::unique_ptr<VulkanSharedMemory> shared_memory_;
|
std::unique_ptr<VulkanSharedMemory> shared_memory_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -15,12 +15,20 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
|
#include "xenia/base/cvar.h"
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
|
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
|
||||||
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
||||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||||
|
|
||||||
|
DEFINE_bool(vulkan_sparse_shared_memory, true,
|
||||||
|
"Enable sparse binding for shared memory emulation. Disabling it "
|
||||||
|
"increases video memory usage - a 512 MB buffer is created - but "
|
||||||
|
"allows graphics debuggers that don't support sparse binding to "
|
||||||
|
"work.",
|
||||||
|
"Vulkan");
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace vulkan {
|
namespace vulkan {
|
||||||
|
@ -43,14 +51,15 @@ bool VulkanSharedMemory::Initialize() {
|
||||||
VkDevice device = provider.device();
|
VkDevice device = provider.device();
|
||||||
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
|
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
|
||||||
|
|
||||||
VkBufferCreateInfo buffer_create_info;
|
|
||||||
buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
|
||||||
buffer_create_info.pNext = nullptr;
|
|
||||||
buffer_create_info.flags = 0;
|
|
||||||
const VkBufferCreateFlags sparse_flags =
|
const VkBufferCreateFlags sparse_flags =
|
||||||
VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
|
VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
|
||||||
VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
|
VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
|
||||||
// TODO(Triang3l): Sparse binding.
|
|
||||||
|
// Try to create a sparse buffer.
|
||||||
|
VkBufferCreateInfo buffer_create_info;
|
||||||
|
buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||||
|
buffer_create_info.pNext = nullptr;
|
||||||
|
buffer_create_info.flags = sparse_flags;
|
||||||
buffer_create_info.size = kBufferSize;
|
buffer_create_info.size = kBufferSize;
|
||||||
buffer_create_info.usage =
|
buffer_create_info.usage =
|
||||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||||
|
@ -58,26 +67,67 @@ bool VulkanSharedMemory::Initialize() {
|
||||||
buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||||
buffer_create_info.queueFamilyIndexCount = 0;
|
buffer_create_info.queueFamilyIndexCount = 0;
|
||||||
buffer_create_info.pQueueFamilyIndices = nullptr;
|
buffer_create_info.pQueueFamilyIndices = nullptr;
|
||||||
VkResult buffer_create_result =
|
if (cvars::vulkan_sparse_shared_memory &&
|
||||||
dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_);
|
provider.IsSparseBindingSupported() &&
|
||||||
if (buffer_create_result != VK_SUCCESS) {
|
device_features.sparseResidencyBuffer) {
|
||||||
if (buffer_create_info.flags & sparse_flags) {
|
if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) ==
|
||||||
buffer_create_info.flags &= ~sparse_flags;
|
VK_SUCCESS) {
|
||||||
buffer_create_result =
|
VkMemoryRequirements buffer_memory_requirements;
|
||||||
dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_);
|
dfn.vkGetBufferMemoryRequirements(device, buffer_,
|
||||||
|
&buffer_memory_requirements);
|
||||||
|
if (xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
|
||||||
|
provider.memory_types_device_local(),
|
||||||
|
&buffer_memory_type_)) {
|
||||||
|
uint32_t allocation_size_log2;
|
||||||
|
xe::bit_scan_forward(
|
||||||
|
std::max(uint64_t(buffer_memory_requirements.alignment),
|
||||||
|
uint64_t(1)),
|
||||||
|
&allocation_size_log2);
|
||||||
|
if (allocation_size_log2 < kBufferSizeLog2) {
|
||||||
|
// Maximum of 1024 allocations in the worst case for all of the
|
||||||
|
// buffer because of the overall 4096 allocation count limit on
|
||||||
|
// Windows drivers.
|
||||||
|
InitializeSparseHostGpuMemory(
|
||||||
|
std::max(allocation_size_log2,
|
||||||
|
std::max(kHostGpuMemoryOptimalSparseAllocationLog2,
|
||||||
|
kBufferSizeLog2 - uint32_t(10))));
|
||||||
|
} else {
|
||||||
|
// Shouldn't happen on any real platform, but no point allocating the
|
||||||
|
// buffer sparsely.
|
||||||
|
dfn.vkDestroyBuffer(device, buffer_, nullptr);
|
||||||
|
buffer_ = VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
if (buffer_create_result != VK_SUCCESS) {
|
} else {
|
||||||
|
XELOGE(
|
||||||
|
"Shared memory: Failed to get a device-local Vulkan memory type "
|
||||||
|
"for the sparse buffer");
|
||||||
|
dfn.vkDestroyBuffer(device, buffer_, nullptr);
|
||||||
|
buffer_ = VK_NULL_HANDLE;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
XELOGE("Shared memory: Failed to create the {} MB Vulkan sparse buffer",
|
||||||
|
kBufferSize >> 20);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a non-sparse buffer if there were issues with the sparse buffer.
|
||||||
|
if (buffer_ == VK_NULL_HANDLE) {
|
||||||
|
XELOGGPU(
|
||||||
|
"Vulkan sparse binding is not used for shared memory emulation - video "
|
||||||
|
"memory usage may increase significantly because a full {} MB buffer "
|
||||||
|
"will be created",
|
||||||
|
kBufferSize >> 20);
|
||||||
|
buffer_create_info.flags &= ~sparse_flags;
|
||||||
|
if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) !=
|
||||||
|
VK_SUCCESS) {
|
||||||
XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer",
|
XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer",
|
||||||
kBufferSize >> 20);
|
kBufferSize >> 20);
|
||||||
Shutdown();
|
Shutdown();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
VkMemoryRequirements buffer_memory_requirements;
|
VkMemoryRequirements buffer_memory_requirements;
|
||||||
dfn.vkGetBufferMemoryRequirements(device, buffer_,
|
dfn.vkGetBufferMemoryRequirements(device, buffer_,
|
||||||
&buffer_memory_requirements);
|
&buffer_memory_requirements);
|
||||||
// TODO(Triang3l): Determine sparse binding properties from memory
|
|
||||||
// requirements.
|
|
||||||
if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
|
if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
|
||||||
provider.memory_types_device_local(),
|
provider.memory_types_device_local(),
|
||||||
&buffer_memory_type_)) {
|
&buffer_memory_type_)) {
|
||||||
|
@ -87,10 +137,20 @@ bool VulkanSharedMemory::Initialize() {
|
||||||
Shutdown();
|
Shutdown();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!(buffer_create_info.flags & sparse_flags)) {
|
|
||||||
VkMemoryAllocateInfo buffer_memory_allocate_info;
|
VkMemoryAllocateInfo buffer_memory_allocate_info;
|
||||||
buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||||
|
VkMemoryDedicatedAllocateInfoKHR buffer_memory_dedicated_allocate_info;
|
||||||
|
if (provider.device_extensions().khr_dedicated_allocation) {
|
||||||
|
buffer_memory_dedicated_allocate_info.sType =
|
||||||
|
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
|
||||||
|
buffer_memory_dedicated_allocate_info.pNext = nullptr;
|
||||||
|
buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE;
|
||||||
|
buffer_memory_dedicated_allocate_info.buffer = buffer_;
|
||||||
|
buffer_memory_allocate_info.pNext =
|
||||||
|
&buffer_memory_dedicated_allocate_info;
|
||||||
|
} else {
|
||||||
buffer_memory_allocate_info.pNext = nullptr;
|
buffer_memory_allocate_info.pNext = nullptr;
|
||||||
|
}
|
||||||
buffer_memory_allocate_info.allocationSize =
|
buffer_memory_allocate_info.allocationSize =
|
||||||
buffer_memory_requirements.size;
|
buffer_memory_requirements.size;
|
||||||
buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
|
buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
|
||||||
|
@ -133,8 +193,6 @@ void VulkanSharedMemory::Shutdown(bool from_destructor) {
|
||||||
VkDevice device = provider.device();
|
VkDevice device = provider.device();
|
||||||
|
|
||||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_);
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_);
|
||||||
|
|
||||||
buffer_memory_allocated_.clear();
|
|
||||||
for (VkDeviceMemory memory : buffer_memory_) {
|
for (VkDeviceMemory memory : buffer_memory_) {
|
||||||
dfn.vkFreeMemory(device, memory, nullptr);
|
dfn.vkFreeMemory(device, memory, nullptr);
|
||||||
}
|
}
|
||||||
|
@ -188,6 +246,51 @@ void VulkanSharedMemory::Use(Usage usage,
|
||||||
last_written_range_ = written_range;
|
last_written_range_ = written_range;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange(
|
||||||
|
uint32_t offset_allocations, uint32_t length_allocations) {
|
||||||
|
if (!length_allocations) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ui::vulkan::VulkanProvider& provider =
|
||||||
|
command_processor_.GetVulkanContext().GetVulkanProvider();
|
||||||
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||||
|
VkDevice device = provider.device();
|
||||||
|
|
||||||
|
VkMemoryAllocateInfo memory_allocate_info;
|
||||||
|
memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||||
|
memory_allocate_info.pNext = nullptr;
|
||||||
|
memory_allocate_info.allocationSize =
|
||||||
|
length_allocations << host_gpu_memory_sparse_granularity_log2();
|
||||||
|
memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
|
||||||
|
VkDeviceMemory memory;
|
||||||
|
if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) !=
|
||||||
|
VK_SUCCESS) {
|
||||||
|
XELOGE("Shared memory: Failed to allocate sparse buffer memory");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
buffer_memory_.push_back(memory);
|
||||||
|
|
||||||
|
VkSparseMemoryBind bind;
|
||||||
|
bind.resourceOffset = offset_allocations
|
||||||
|
<< host_gpu_memory_sparse_granularity_log2();
|
||||||
|
bind.size = memory_allocate_info.allocationSize;
|
||||||
|
bind.memory = memory;
|
||||||
|
bind.memoryOffset = 0;
|
||||||
|
bind.flags = 0;
|
||||||
|
VkPipelineStageFlags bind_wait_stage_mask =
|
||||||
|
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
|
||||||
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||||
|
if (provider.device_features().tessellationShader) {
|
||||||
|
bind_wait_stage_mask |=
|
||||||
|
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
|
||||||
|
}
|
||||||
|
command_processor_.SparseBindBuffer(buffer_, 1, &bind, bind_wait_stage_mask);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool VulkanSharedMemory::UploadRanges(
|
bool VulkanSharedMemory::UploadRanges(
|
||||||
const std::vector<std::pair<uint32_t, uint32_t>>& upload_page_ranges) {
|
const std::vector<std::pair<uint32_t, uint32_t>>& upload_page_ranges) {
|
||||||
if (upload_page_ranges.empty()) {
|
if (upload_page_ranges.empty()) {
|
||||||
|
|
|
@ -54,14 +54,13 @@ class VulkanSharedMemory : public SharedMemory {
|
||||||
VkBuffer buffer() const { return buffer_; }
|
VkBuffer buffer() const { return buffer_; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
bool AllocateSparseHostGpuMemoryRange(uint32_t offset_allocations,
|
||||||
|
uint32_t length_allocations) override;
|
||||||
|
|
||||||
bool UploadRanges(const std::vector<std::pair<uint32_t, uint32_t>>&
|
bool UploadRanges(const std::vector<std::pair<uint32_t, uint32_t>>&
|
||||||
upload_page_ranges) override;
|
upload_page_ranges) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool IsSparse() const {
|
|
||||||
return buffer_allocation_size_log2_ < kBufferSizeLog2;
|
|
||||||
}
|
|
||||||
|
|
||||||
void GetBarrier(Usage usage, VkPipelineStageFlags& stage_mask,
|
void GetBarrier(Usage usage, VkPipelineStageFlags& stage_mask,
|
||||||
VkAccessFlags& access_mask) const;
|
VkAccessFlags& access_mask) const;
|
||||||
|
|
||||||
|
@ -70,16 +69,8 @@ class VulkanSharedMemory : public SharedMemory {
|
||||||
|
|
||||||
VkBuffer buffer_ = VK_NULL_HANDLE;
|
VkBuffer buffer_ = VK_NULL_HANDLE;
|
||||||
uint32_t buffer_memory_type_;
|
uint32_t buffer_memory_type_;
|
||||||
// Maximum of 1024 allocations in the worst case for all of the buffer because
|
// Single for non-sparse, every allocation so far for sparse.
|
||||||
// of the overall 4096 allocation count limit on Windows drivers.
|
|
||||||
static constexpr uint32_t kMinBufferAllocationSizeLog2 =
|
|
||||||
std::max(kHostGpuMemoryOptimalSparseAllocationLog2,
|
|
||||||
kBufferSizeLog2 - uint32_t(10));
|
|
||||||
uint32_t buffer_allocation_size_log2_ = kBufferSizeLog2;
|
|
||||||
// Sparse memory allocations, of different sizes.
|
|
||||||
std::vector<VkDeviceMemory> buffer_memory_;
|
std::vector<VkDeviceMemory> buffer_memory_;
|
||||||
// One bit per every 2^buffer_allocation_size_log2_ of the buffer.
|
|
||||||
std::vector<uint64_t> buffer_memory_allocated_;
|
|
||||||
|
|
||||||
// First usage will likely be uploading.
|
// First usage will likely be uploading.
|
||||||
Usage last_usage_ = Usage::kTransferDestination;
|
Usage last_usage_ = Usage::kTransferDestination;
|
||||||
|
|
|
@ -30,7 +30,7 @@ DEFINE_bool(
|
||||||
vulkan_validation, true,
|
vulkan_validation, true,
|
||||||
"Enable Vulkan validation (VK_LAYER_KHRONOS_validation). Messages will be "
|
"Enable Vulkan validation (VK_LAYER_KHRONOS_validation). Messages will be "
|
||||||
"written to the OS debug log.",
|
"written to the OS debug log.",
|
||||||
"GPU");
|
"Vulkan");
|
||||||
DEFINE_int32(
|
DEFINE_int32(
|
||||||
vulkan_device, -1,
|
vulkan_device, -1,
|
||||||
"Index of the physical device to use, or -1 for any compatible device.",
|
"Index of the physical device to use, or -1 for any compatible device.",
|
||||||
|
@ -587,6 +587,7 @@ bool VulkanProvider::Initialize() {
|
||||||
XE_VULKAN_LOAD_DFN(vkMapMemory);
|
XE_VULKAN_LOAD_DFN(vkMapMemory);
|
||||||
XE_VULKAN_LOAD_DFN(vkResetCommandPool);
|
XE_VULKAN_LOAD_DFN(vkResetCommandPool);
|
||||||
XE_VULKAN_LOAD_DFN(vkResetFences);
|
XE_VULKAN_LOAD_DFN(vkResetFences);
|
||||||
|
XE_VULKAN_LOAD_DFN(vkQueueBindSparse);
|
||||||
XE_VULKAN_LOAD_DFN(vkQueuePresentKHR);
|
XE_VULKAN_LOAD_DFN(vkQueuePresentKHR);
|
||||||
XE_VULKAN_LOAD_DFN(vkQueueSubmit);
|
XE_VULKAN_LOAD_DFN(vkQueueSubmit);
|
||||||
XE_VULKAN_LOAD_DFN(vkUnmapMemory);
|
XE_VULKAN_LOAD_DFN(vkUnmapMemory);
|
||||||
|
|
|
@ -190,6 +190,7 @@ class VulkanProvider : public GraphicsProvider {
|
||||||
PFN_vkMapMemory vkMapMemory;
|
PFN_vkMapMemory vkMapMemory;
|
||||||
PFN_vkResetCommandPool vkResetCommandPool;
|
PFN_vkResetCommandPool vkResetCommandPool;
|
||||||
PFN_vkResetFences vkResetFences;
|
PFN_vkResetFences vkResetFences;
|
||||||
|
PFN_vkQueueBindSparse vkQueueBindSparse;
|
||||||
PFN_vkQueuePresentKHR vkQueuePresentKHR;
|
PFN_vkQueuePresentKHR vkQueuePresentKHR;
|
||||||
PFN_vkQueueSubmit vkQueueSubmit;
|
PFN_vkQueueSubmit vkQueueSubmit;
|
||||||
PFN_vkUnmapMemory vkUnmapMemory;
|
PFN_vkUnmapMemory vkUnmapMemory;
|
||||||
|
@ -205,9 +206,21 @@ class VulkanProvider : public GraphicsProvider {
|
||||||
return dfn_.vkQueueSubmit(queue_graphics_compute_, submit_count, submits,
|
return dfn_.vkQueueSubmit(queue_graphics_compute_, submit_count, submits,
|
||||||
fence);
|
fence);
|
||||||
}
|
}
|
||||||
bool CanSubmitSparseBindings() const {
|
// Safer in Xenia context - in case a sparse binding queue was not obtained
|
||||||
|
// for some reason.
|
||||||
|
bool IsSparseBindingSupported() const {
|
||||||
return queue_sparse_binding_ != VK_NULL_HANDLE;
|
return queue_sparse_binding_ != VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
|
VkResult BindSparse(uint32_t bind_info_count,
|
||||||
|
const VkBindSparseInfo* bind_info, VkFence fence) {
|
||||||
|
assert_true(IsSparseBindingSupported());
|
||||||
|
std::mutex& mutex = queue_sparse_binding_ == queue_graphics_compute_
|
||||||
|
? queue_graphics_compute_mutex_
|
||||||
|
: queue_sparse_binding_separate_mutex_;
|
||||||
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
return dfn_.vkQueueBindSparse(queue_sparse_binding_, bind_info_count,
|
||||||
|
bind_info, fence);
|
||||||
|
}
|
||||||
VkResult Present(const VkPresentInfoKHR* present_info) {
|
VkResult Present(const VkPresentInfoKHR* present_info) {
|
||||||
// FIXME(Triang3l): Allow a separate queue for present - see
|
// FIXME(Triang3l): Allow a separate queue for present - see
|
||||||
// vulkan_provider.cc for details.
|
// vulkan_provider.cc for details.
|
||||||
|
|
Loading…
Reference in New Issue