[Vulkan] Submissions

This commit is contained in:
Triang3l 2020-10-01 21:17:10 +03:00
parent 0fcf322565
commit 865f77bae2
5 changed files with 441 additions and 16 deletions

View File

@ -9,6 +9,16 @@
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
#include <cstdint>
#include <iterator>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/profiling.h"
#include "xenia/ui/vulkan/vulkan_context.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace gpu {
namespace vulkan {
@ -24,16 +34,79 @@ void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
void VulkanCommandProcessor::RestoreEdramSnapshot(const void* snapshot) {}
bool VulkanCommandProcessor::SetupContext() {
return CommandProcessor::SetupContext();
if (!CommandProcessor::SetupContext()) {
XELOGE("Failed to initialize base command processor context");
return false;
}
const ui::vulkan::VulkanProvider& provider =
GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
return true;
}
void VulkanCommandProcessor::ShutdownContext() {
return CommandProcessor::ShutdownContext();
AwaitAllQueueOperationsCompletion();
const ui::vulkan::VulkanProvider& provider =
GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
for (const auto& command_buffer_pair : command_buffers_submitted_) {
dfn.vkDestroyCommandPool(device, command_buffer_pair.first.pool, nullptr);
}
command_buffers_submitted_.clear();
for (const CommandBuffer& command_buffer : command_buffers_writable_) {
dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr);
}
command_buffers_writable_.clear();
std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_));
frame_completed_ = 0;
frame_current_ = 1;
frame_open_ = false;
for (const auto& semaphore :
submissions_in_flight_sparse_binding_semaphores_) {
dfn.vkDestroySemaphore(device, semaphore.first, nullptr);
}
submissions_in_flight_sparse_binding_semaphores_.clear();
for (VkFence& fence : submissions_in_flight_fences_) {
dfn.vkDestroyFence(device, fence, nullptr);
}
submissions_in_flight_fences_.clear();
submission_completed_ = 0;
submission_open_ = false;
for (VkSemaphore semaphore : semaphores_free_) {
dfn.vkDestroySemaphore(device, semaphore, nullptr);
}
semaphores_free_.clear();
for (VkFence fence : fences_free_) {
dfn.vkDestroyFence(device, fence, nullptr);
}
fences_free_.clear();
CommandProcessor::ShutdownContext();
}
void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
uint32_t frontbuffer_width,
uint32_t frontbuffer_height) {}
uint32_t frontbuffer_height) {
// FIXME(Triang3l): frontbuffer_ptr is currently unreliable, in the trace
// player it's set to 0, but it's not needed anyway since the fetch constant
// contains the address.
SCOPE_profile_cpu_f("gpu");
// In case the swap command is the only one in the frame.
BeginSubmission(true);
EndSubmission(true);
}
Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type,
uint32_t guest_address,
@ -46,15 +119,282 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
uint32_t index_count,
IndexBufferInfo* index_buffer_info,
bool major_mode_explicit) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
BeginSubmission(true);
return true;
}
bool VulkanCommandProcessor::IssueCopy() { return true; }
bool VulkanCommandProcessor::IssueCopy() {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
BeginSubmission(true);
return true;
}
void VulkanCommandProcessor::InitializeTrace() {}
void VulkanCommandProcessor::FinalizeTrace() {}
void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
if (await_submission >= GetCurrentSubmission()) {
if (submission_open_) {
EndSubmission(false);
}
// A submission won't be ended if it hasn't been started, or if ending
// has failed - clamp the index.
await_submission = GetCurrentSubmission() - 1;
}
const ui::vulkan::VulkanProvider& provider =
GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
size_t fences_total = submissions_in_flight_fences_.size();
size_t fences_awaited = 0;
if (await_submission > submission_completed_) {
// Await in a blocking way if requested.
if (dfn.vkWaitForFences(device,
uint32_t(await_submission - submission_completed_),
submissions_in_flight_fences_.data(), VK_TRUE,
UINT64_MAX) == VK_SUCCESS) {
fences_awaited += await_submission - submission_completed_;
} else {
XELOGE("Failed to await submission completion Vulkan fences");
}
}
// Check how far into the submissions the GPU currently is, in order because
// submission themselves can be executed out of order, but Xenia serializes
// that for simplicity.
while (fences_awaited < fences_total) {
if (dfn.vkWaitForFences(device, 1,
&submissions_in_flight_fences_[fences_awaited],
VK_TRUE, 0) != VK_SUCCESS) {
break;
}
++fences_awaited;
}
if (!fences_awaited) {
// Not updated - no need to reclaim or download things.
return;
}
// Reclaim fences.
fences_free_.reserve(fences_free_.size() + fences_awaited);
auto submissions_in_flight_fences_awaited_end =
submissions_in_flight_fences_.cbegin();
std::advance(submissions_in_flight_fences_awaited_end, fences_awaited);
fences_free_.insert(fences_free_.cend(),
submissions_in_flight_fences_.cbegin(),
submissions_in_flight_fences_awaited_end);
submissions_in_flight_fences_.erase(submissions_in_flight_fences_.cbegin(),
submissions_in_flight_fences_awaited_end);
submission_completed_ += fences_awaited;
// Reclaim semaphores used for sparse binding and graphics synchronization.
while (!submissions_in_flight_sparse_binding_semaphores_.empty()) {
const auto& semaphore_submission =
submissions_in_flight_sparse_binding_semaphores_.front();
if (semaphore_submission.second > submission_completed_) {
break;
}
semaphores_free_.push_back(semaphore_submission.first);
submissions_in_flight_sparse_binding_semaphores_.pop_front();
}
// Reclaim command pools.
while (!command_buffers_submitted_.empty()) {
const auto& command_buffer_pair = command_buffers_submitted_.front();
if (command_buffer_pair.second > submission_completed_) {
break;
}
command_buffers_writable_.push_back(command_buffer_pair.first);
command_buffers_submitted_.pop_front();
}
}
void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
bool is_opening_frame = is_guest_command && !frame_open_;
if (submission_open_ && !is_opening_frame) {
return;
}
// Check the fence - needed for all kinds of submissions (to reclaim transient
// resources early) and specifically for frames (not to queue too many), and
// await the availability of the current frame.
CheckSubmissionFence(
is_opening_frame
? closed_frame_submissions_[frame_current_ % kMaxFramesInFlight]
: 0);
// TODO(Triang3l): If failed to await (completed submission < awaited frame
// submission), do something like dropping the draw command that wanted to
// open the frame.
if (is_opening_frame) {
// Update the completed frame index, also obtaining the actual completed
// frame number (since the CPU may be actually less than 3 frames behind)
// before reclaiming resources tracked with the frame number.
frame_completed_ = std::max(frame_current_, uint64_t(kMaxFramesInFlight)) -
kMaxFramesInFlight;
for (uint64_t frame = frame_completed_ + 1; frame < frame_current_;
++frame) {
if (closed_frame_submissions_[frame % kMaxFramesInFlight] >
submission_completed_) {
break;
}
frame_completed_ = frame;
}
}
if (!submission_open_) {
submission_open_ = true;
}
if (is_opening_frame) {
frame_open_ = true;
}
}
bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
ui::vulkan::VulkanProvider& provider = GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
// Make sure everything needed for submitting exist.
if (submission_open_) {
if (fences_free_.empty()) {
VkFenceCreateInfo fence_create_info;
fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fence_create_info.pNext = nullptr;
fence_create_info.flags = 0;
VkFence fence;
if (dfn.vkCreateFence(device, &fence_create_info, nullptr, &fence) !=
VK_SUCCESS) {
XELOGE("Failed to create a Vulkan submission fence");
// Try to submit later. Completely dropping the submission is not
// permitted because resources would be left in an undefined state.
return false;
}
fences_free_.push_back(fence);
}
// TODO(Triang3l): Create a sparse binding semaphore.
if (command_buffers_writable_.empty()) {
CommandBuffer command_buffer;
VkCommandPoolCreateInfo command_pool_create_info;
command_pool_create_info.sType =
VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
command_pool_create_info.pNext = nullptr;
command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
command_pool_create_info.queueFamilyIndex =
provider.queue_family_graphics_compute();
if (dfn.vkCreateCommandPool(device, &command_pool_create_info, nullptr,
&command_buffer.pool) != VK_SUCCESS) {
XELOGE("Failed to create a Vulkan command pool");
return false;
}
VkCommandBufferAllocateInfo command_buffer_allocate_info;
command_buffer_allocate_info.sType =
VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
command_buffer_allocate_info.pNext = nullptr;
command_buffer_allocate_info.commandPool = command_buffer.pool;
command_buffer_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
command_buffer_allocate_info.commandBufferCount = 1;
if (dfn.vkAllocateCommandBuffers(device, &command_buffer_allocate_info,
&command_buffer.buffer) != VK_SUCCESS) {
XELOGE("Failed to allocate a Vulkan command buffer");
dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr);
return false;
}
command_buffers_writable_.push_back(command_buffer);
}
}
bool is_closing_frame = is_swap && frame_open_;
if (submission_open_) {
assert_false(command_buffers_writable_.empty());
CommandBuffer command_buffer = command_buffers_writable_.back();
if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) {
XELOGE("Failed to reset a Vulkan command pool");
return false;
}
VkCommandBufferBeginInfo command_buffer_begin_info;
command_buffer_begin_info.sType =
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
command_buffer_begin_info.pNext = nullptr;
command_buffer_begin_info.flags =
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
command_buffer_begin_info.pInheritanceInfo = nullptr;
if (dfn.vkBeginCommandBuffer(command_buffer.buffer,
&command_buffer_begin_info) != VK_SUCCESS) {
XELOGE("Failed to begin a Vulkan command buffer");
return false;
}
// TODO(Triang3l): Write deferred command buffer commands.
if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) {
XELOGE("Failed to end a Vulkan command buffer");
return false;
}
// TODO(Triang3l): Submit sparse binding.
VkSubmitInfo submit_info;
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.pNext = nullptr;
submit_info.waitSemaphoreCount = 0;
submit_info.pWaitSemaphores = nullptr;
submit_info.pWaitDstStageMask = nullptr;
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &command_buffer.buffer;
submit_info.signalSemaphoreCount = 0;
submit_info.pSignalSemaphores = nullptr;
assert_false(fences_free_.empty());
VkFence fence = fences_free_.back();
if (dfn.vkResetFences(device, 1, &fence) != VK_SUCCESS) {
XELOGE("Failed to reset a Vulkan submission fence");
return false;
}
if (provider.SubmitToGraphicsComputeQueue(1, &submit_info, fence) !=
VK_SUCCESS) {
XELOGE("Failed to submit a Vulkan command buffer");
return false;
}
command_buffers_submitted_.push_back(
std::make_pair(command_buffer, GetCurrentSubmission()));
command_buffers_writable_.pop_back();
// Increments the current submission number, going to the next submission.
submissions_in_flight_fences_.push_back(fence);
fences_free_.pop_back();
submission_open_ = false;
}
if (is_closing_frame) {
frame_open_ = false;
// Submission already closed now, so minus 1.
closed_frame_submissions_[(frame_current_++) % kMaxFramesInFlight] =
GetCurrentSubmission() - 1;
if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) {
cache_clear_requested_ = false;
for (const CommandBuffer& command_buffer : command_buffers_writable_) {
dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr);
}
command_buffers_writable_.clear();
}
}
return true;
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -10,10 +10,16 @@
#ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_
#define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_
#include <cstdint>
#include <deque>
#include <utility>
#include <vector>
#include "xenia/gpu/command_processor.h"
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
#include "xenia/gpu/xenos.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/ui/vulkan/vulkan_context.h"
namespace xe {
namespace gpu {
@ -29,7 +35,17 @@ class VulkanCommandProcessor : public CommandProcessor {
void RestoreEdramSnapshot(const void* snapshot) override;
private:
ui::vulkan::VulkanContext& GetVulkanContext() const {
return static_cast<ui::vulkan::VulkanContext&>(*context_);
}
uint64_t GetCurrentSubmission() const {
return submission_completed_ +
uint64_t(submissions_in_flight_fences_.size()) + 1;
}
uint64_t GetCompletedSubmission() const { return submission_completed_; }
protected:
bool SetupContext() override;
void ShutdownContext() override;
@ -47,6 +63,56 @@ class VulkanCommandProcessor : public CommandProcessor {
void InitializeTrace() override;
void FinalizeTrace() override;
private:
// BeginSubmission and EndSubmission may be called at any time. If there's an
// open non-frame submission, BeginSubmission(true) will promote it to a
// frame. EndSubmission(true) will close the frame no matter whether the
// submission has already been closed.
// Rechecks submission number and reclaims per-submission resources. Pass 0 as
// the submission to await to simply check status, or pass
// GetCurrentSubmission() to wait for all queue operations to be completed.
void CheckSubmissionFence(uint64_t await_submission);
// If is_guest_command is true, a new full frame - with full cleanup of
// resources and, if needed, starting capturing - is opened if pending (as
// opposed to simply resuming after mid-frame synchronization).
void BeginSubmission(bool is_guest_command);
// If is_swap is true, a full frame is closed - with, if needed, cache
// clearing and stopping capturing. Returns whether the submission was done
// successfully, if it has failed, leaves it open.
bool EndSubmission(bool is_swap);
bool AwaitAllQueueOperationsCompletion() {
CheckSubmissionFence(GetCurrentSubmission());
return !submission_open_ && submissions_in_flight_fences_.empty();
}
bool cache_clear_requested_ = false;
std::vector<VkFence> fences_free_;
std::vector<VkSemaphore> semaphores_free_;
bool submission_open_ = false;
uint64_t submission_completed_ = 0;
std::vector<VkFence> submissions_in_flight_fences_;
std::deque<std::pair<VkSemaphore, uint64_t>>
submissions_in_flight_sparse_binding_semaphores_;
static constexpr uint32_t kMaxFramesInFlight = 3;
bool frame_open_ = false;
// Guest frame index, since some transient resources can be reused across
// submissions. Values updated in the beginning of a frame.
uint64_t frame_current_ = 1;
uint64_t frame_completed_ = 0;
// Submission indices of frames that have already been submitted.
uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {};
struct CommandBuffer {
VkCommandPool pool;
VkCommandBuffer buffer;
};
std::vector<CommandBuffer> command_buffers_writable_;
std::deque<std::pair<CommandBuffer, uint64_t>> command_buffers_submitted_;
};
} // namespace vulkan

View File

@ -737,10 +737,9 @@ void VulkanContext::EndSwap() {
return;
}
const VulkanProvider& provider = GetVulkanProvider();
VulkanProvider& provider = GetVulkanProvider();
const VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
VkQueue queue_graphics_compute = provider.queue_graphics_compute();
const SwapSubmission& submission =
swap_submissions_[swap_submission_current_ % kSwapchainMaxImageCount];
@ -771,8 +770,8 @@ void VulkanContext::EndSwap() {
submit_info.pCommandBuffers = submit_command_buffers;
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &swap_render_completion_semaphore_;
VkResult submit_result = dfn.vkQueueSubmit(queue_graphics_compute, 1,
&submit_info, submission.fence);
VkResult submit_result =
provider.SubmitToGraphicsComputeQueue(1, &submit_info, submission.fence);
if (submit_result != VK_SUCCESS) {
// If failed, can't even return the swapchain image - so treat all errors as
// context loss.
@ -790,10 +789,7 @@ void VulkanContext::EndSwap() {
present_info.pSwapchains = &swap_swapchain_;
present_info.pImageIndices = &swap_swapchain_image_current_;
present_info.pResults = nullptr;
// FIXME(Triang3l): Allow a separate queue for present - see
// vulkan_provider.cc for details.
VkResult present_result =
dfn.vkQueuePresentKHR(queue_graphics_compute, &present_info);
VkResult present_result = provider.Present(&present_info);
swap_swapchain_image_current_ = UINT32_MAX;
switch (present_result) {
case VK_SUCCESS:

View File

@ -19,6 +19,8 @@
#include "xenia/ui/vulkan/vulkan_immediate_drawer.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#define FINE_GRAINED_DRAW_SCOPES 1
namespace xe {
namespace ui {
namespace vulkan {

View File

@ -12,8 +12,10 @@
#include <cstdint>
#include <memory>
#include <mutex>
#include <vector>
#include "xenia/base/assert.h"
#include "xenia/base/platform.h"
#include "xenia/ui/graphics_provider.h"
@ -193,9 +195,22 @@ class VulkanProvider : public GraphicsProvider {
};
const DeviceFunctions& dfn() const { return dfn_; }
VkQueue queue_graphics_compute() const { return queue_graphics_compute_; }
// May be VK_NULL_HANDLE if not available.
VkQueue queue_sparse_binding() const { return queue_sparse_binding_; }
VkResult SubmitToGraphicsComputeQueue(uint32_t submit_count,
const VkSubmitInfo* submits,
VkFence fence) {
std::lock_guard<std::mutex> lock(queue_graphics_compute_mutex_);
return dfn_.vkQueueSubmit(queue_graphics_compute_, submit_count, submits,
fence);
}
bool CanSubmitSparseBindings() const {
return queue_sparse_binding_ != VK_NULL_HANDLE;
}
VkResult Present(const VkPresentInfoKHR* present_info) {
// FIXME(Triang3l): Allow a separate queue for present - see
// vulkan_provider.cc for details.
std::lock_guard<std::mutex> lock(queue_graphics_compute_mutex_);
return dfn_.vkQueuePresentKHR(queue_graphics_compute_, present_info);
}
// Samplers that may be useful for host needs. Only these samplers should be
// used in host, non-emulation contexts, because the total number of samplers
@ -242,8 +257,14 @@ class VulkanProvider : public GraphicsProvider {
VkDevice device_ = VK_NULL_HANDLE;
DeviceFunctions dfn_ = {};
VkQueue queue_graphics_compute_;
// VkQueue access must be externally synchronized - must be locked when
// submitting anything.
std::mutex queue_graphics_compute_mutex_;
// May be VK_NULL_HANDLE if not available.
VkQueue queue_sparse_binding_;
// If queue_sparse_binding_ == queue_graphics_compute_, lock
// queue_graphics_compute_mutex_ instead when submitting sparse bindings.
std::mutex queue_sparse_binding_separate_mutex_;
VkSampler host_samplers_[size_t(HostSampler::kCount)] = {};
};