[Vulkan] Pipeline barrier merging

This commit is contained in:
Triang3l 2022-03-21 23:02:51 +03:00
parent acc4fd6846
commit 1259c9f7a2
9 changed files with 400 additions and 215 deletions

View File

@ -772,41 +772,26 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
new_swap_framebuffer.last_submission = 0;
}
// End the current render pass before inserting barriers and starting a
// new one.
EndRenderPass();
if (vulkan_context.image_ever_written_previously()) {
// Insert a barrier after the last presenter's usage of the guest
// output image.
VkImageMemoryBarrier guest_output_image_acquire_barrier;
guest_output_image_acquire_barrier.sType =
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
guest_output_image_acquire_barrier.pNext = nullptr;
guest_output_image_acquire_barrier.srcAccessMask =
ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask;
guest_output_image_acquire_barrier.dstAccessMask =
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
// Will be overwriting all the contents.
guest_output_image_acquire_barrier.oldLayout =
VK_IMAGE_LAYOUT_UNDEFINED;
// The render pass will do the layout transition, but newLayout must
// not be UNDEFINED.
guest_output_image_acquire_barrier.newLayout =
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
guest_output_image_acquire_barrier.srcQueueFamilyIndex =
VK_QUEUE_FAMILY_IGNORED;
guest_output_image_acquire_barrier.dstQueueFamilyIndex =
VK_QUEUE_FAMILY_IGNORED;
guest_output_image_acquire_barrier.image = vulkan_context.image();
ui::vulkan::util::InitializeSubresourceRange(
guest_output_image_acquire_barrier.subresourceRange);
deferred_command_buffer_.CmdVkPipelineBarrier(
// output image. Will be overwriting all the contents, so oldLayout
// layout is UNDEFINED. The render pass will do the layout transition,
// but newLayout must not be UNDEFINED.
PushImageMemoryBarrier(
vulkan_context.image(),
ui::vulkan::util::InitializeSubresourceRange(),
ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0,
nullptr, 1, &guest_output_image_acquire_barrier);
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
}
// End the current render pass before inserting barriers and starting a
// new one, and insert the barrier.
SubmitBarriers(true);
SwapFramebuffer& swap_framebuffer =
swap_framebuffers_[swap_framebuffer_index];
swap_framebuffer.last_submission = GetCurrentSubmission();
@ -848,33 +833,20 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
deferred_command_buffer_.CmdVkEndRenderPass();
VkImageMemoryBarrier guest_output_image_release_barrier;
guest_output_image_release_barrier.sType =
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
guest_output_image_release_barrier.pNext = nullptr;
guest_output_image_release_barrier.srcAccessMask =
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
guest_output_image_release_barrier.dstAccessMask =
ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask;
guest_output_image_release_barrier.oldLayout =
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
guest_output_image_release_barrier.newLayout =
ui::vulkan::VulkanPresenter::kGuestOutputInternalLayout;
guest_output_image_release_barrier.srcQueueFamilyIndex =
VK_QUEUE_FAMILY_IGNORED;
guest_output_image_release_barrier.dstQueueFamilyIndex =
VK_QUEUE_FAMILY_IGNORED;
guest_output_image_release_barrier.image = vulkan_context.image();
ui::vulkan::util::InitializeSubresourceRange(
guest_output_image_release_barrier.subresourceRange);
deferred_command_buffer_.CmdVkPipelineBarrier(
// Insert the release barrier.
PushImageMemoryBarrier(
vulkan_context.image(),
ui::vulkan::util::InitializeSubresourceRange(),
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask, 0, 0,
nullptr, 0, nullptr, 1, &guest_output_image_release_barrier);
ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
ui::vulkan::VulkanPresenter::kGuestOutputInternalLayout);
// Need to submit all the commands before giving the image back to the
// presenter so it can submit its own commands for displaying it to the
// queue.
// queue, and also need to submit the release barrier.
EndSubmission(true);
return true;
});
@ -884,6 +856,215 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
EndSubmission(true);
}
void VulkanCommandProcessor::PushBufferMemoryBarrier(
VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask,
uint32_t src_queue_family_index, uint32_t dst_queue_family_index,
bool skip_if_equal) {
if (skip_if_equal && src_stage_mask == dst_stage_mask &&
src_access_mask == dst_access_mask &&
src_queue_family_index == dst_queue_family_index) {
return;
}
// Separate different barriers for overlapping buffer ranges into different
// pipeline barrier commands.
for (const VkBufferMemoryBarrier& other_buffer_memory_barrier :
pending_barriers_buffer_memory_barriers_) {
if (other_buffer_memory_barrier.buffer != buffer ||
(size != VK_WHOLE_SIZE &&
offset + size <= other_buffer_memory_barrier.offset) ||
(other_buffer_memory_barrier.size != VK_WHOLE_SIZE &&
other_buffer_memory_barrier.offset +
other_buffer_memory_barrier.size <=
offset)) {
continue;
}
if (other_buffer_memory_barrier.offset == offset &&
other_buffer_memory_barrier.size == size &&
other_buffer_memory_barrier.srcAccessMask == src_access_mask &&
other_buffer_memory_barrier.dstAccessMask == dst_access_mask &&
other_buffer_memory_barrier.srcQueueFamilyIndex ==
src_queue_family_index &&
other_buffer_memory_barrier.dstQueueFamilyIndex ==
dst_queue_family_index) {
// The barrier is already present.
current_pending_barrier_.src_stage_mask |= src_stage_mask;
current_pending_barrier_.dst_stage_mask |= dst_stage_mask;
return;
}
SplitPendingBarrier();
break;
}
current_pending_barrier_.src_stage_mask |= src_stage_mask;
current_pending_barrier_.dst_stage_mask |= dst_stage_mask;
VkBufferMemoryBarrier& buffer_memory_barrier =
pending_barriers_buffer_memory_barriers_.emplace_back();
buffer_memory_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
buffer_memory_barrier.pNext = nullptr;
buffer_memory_barrier.srcAccessMask = src_access_mask;
buffer_memory_barrier.dstAccessMask = dst_access_mask;
buffer_memory_barrier.srcQueueFamilyIndex = src_queue_family_index;
buffer_memory_barrier.dstQueueFamilyIndex = dst_queue_family_index;
buffer_memory_barrier.buffer = buffer;
buffer_memory_barrier.offset = offset;
buffer_memory_barrier.size = size;
}
void VulkanCommandProcessor::PushImageMemoryBarrier(
VkImage image, const VkImageSubresourceRange& subresource_range,
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask,
VkImageLayout old_layout, VkImageLayout new_layout,
uint32_t src_queue_family_index, uint32_t dst_queue_family_index,
bool skip_if_equal) {
if (skip_if_equal && src_stage_mask == dst_stage_mask &&
src_access_mask == dst_access_mask && old_layout == new_layout &&
src_queue_family_index == dst_queue_family_index) {
return;
}
// Separate different barriers for overlapping image subresource ranges into
// different pipeline barrier commands.
for (const VkImageMemoryBarrier& other_image_memory_barrier :
pending_barriers_image_memory_barriers_) {
if (other_image_memory_barrier.image != image ||
!(other_image_memory_barrier.subresourceRange.aspectMask &
subresource_range.aspectMask) ||
(subresource_range.levelCount != VK_REMAINING_MIP_LEVELS &&
subresource_range.baseMipLevel + subresource_range.levelCount <=
other_image_memory_barrier.subresourceRange.baseMipLevel) ||
(other_image_memory_barrier.subresourceRange.levelCount !=
VK_REMAINING_MIP_LEVELS &&
other_image_memory_barrier.subresourceRange.baseMipLevel +
other_image_memory_barrier.subresourceRange.levelCount <=
subresource_range.baseMipLevel) ||
(subresource_range.layerCount != VK_REMAINING_ARRAY_LAYERS &&
subresource_range.baseArrayLayer + subresource_range.layerCount <=
other_image_memory_barrier.subresourceRange.baseArrayLayer) ||
(other_image_memory_barrier.subresourceRange.layerCount !=
VK_REMAINING_ARRAY_LAYERS &&
other_image_memory_barrier.subresourceRange.baseArrayLayer +
other_image_memory_barrier.subresourceRange.layerCount <=
subresource_range.baseArrayLayer)) {
continue;
}
if (other_image_memory_barrier.subresourceRange.aspectMask ==
subresource_range.aspectMask &&
other_image_memory_barrier.subresourceRange.baseMipLevel ==
subresource_range.baseMipLevel &&
other_image_memory_barrier.subresourceRange.levelCount ==
subresource_range.levelCount &&
other_image_memory_barrier.subresourceRange.baseArrayLayer ==
subresource_range.baseArrayLayer &&
other_image_memory_barrier.subresourceRange.layerCount ==
subresource_range.layerCount &&
other_image_memory_barrier.srcAccessMask == src_access_mask &&
other_image_memory_barrier.dstAccessMask == dst_access_mask &&
other_image_memory_barrier.oldLayout == old_layout &&
other_image_memory_barrier.newLayout == new_layout &&
other_image_memory_barrier.srcQueueFamilyIndex ==
src_queue_family_index &&
other_image_memory_barrier.dstQueueFamilyIndex ==
dst_queue_family_index) {
// The barrier is already present.
current_pending_barrier_.src_stage_mask |= src_stage_mask;
current_pending_barrier_.dst_stage_mask |= dst_stage_mask;
return;
}
SplitPendingBarrier();
break;
}
current_pending_barrier_.src_stage_mask |= src_stage_mask;
current_pending_barrier_.dst_stage_mask |= dst_stage_mask;
VkImageMemoryBarrier& image_memory_barrier =
pending_barriers_image_memory_barriers_.emplace_back();
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = src_access_mask;
image_memory_barrier.dstAccessMask = dst_access_mask;
image_memory_barrier.oldLayout = old_layout;
image_memory_barrier.newLayout = new_layout;
image_memory_barrier.srcQueueFamilyIndex = src_queue_family_index;
image_memory_barrier.dstQueueFamilyIndex = dst_queue_family_index;
image_memory_barrier.image = image;
image_memory_barrier.subresourceRange = subresource_range;
}
bool VulkanCommandProcessor::SubmitBarriers(bool force_end_render_pass) {
assert_true(submission_open_);
SplitPendingBarrier();
if (pending_barriers_.empty()) {
if (force_end_render_pass) {
EndRenderPass();
}
return false;
}
EndRenderPass();
for (auto it = pending_barriers_.cbegin(); it != pending_barriers_.cend();
++it) {
auto it_next = std::next(it);
bool is_last = it_next == pending_barriers_.cend();
// .data() + offset, not &[offset], for buffer and image barriers, because
// if there are no buffer or image memory barriers in the last pipeline
// barriers, the offsets may be equal to the sizes of the vectors.
deferred_command_buffer_.CmdVkPipelineBarrier(
it->src_stage_mask ? it->src_stage_mask
: VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
it->dst_stage_mask ? it->dst_stage_mask
: VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
0, 0, nullptr,
uint32_t((is_last ? pending_barriers_buffer_memory_barriers_.size()
: it_next->buffer_memory_barriers_offset) -
it->buffer_memory_barriers_offset),
pending_barriers_buffer_memory_barriers_.data() +
it->buffer_memory_barriers_offset,
uint32_t((is_last ? pending_barriers_image_memory_barriers_.size()
: it_next->image_memory_barriers_offset) -
it->image_memory_barriers_offset),
pending_barriers_image_memory_barriers_.data() +
it->image_memory_barriers_offset);
}
pending_barriers_.clear();
pending_barriers_buffer_memory_barriers_.clear();
pending_barriers_image_memory_barriers_.clear();
current_pending_barrier_.buffer_memory_barriers_offset = 0;
current_pending_barrier_.image_memory_barriers_offset = 0;
return true;
}
void VulkanCommandProcessor::SubmitBarriersAndEnterRenderTargetCacheRenderPass(
VkRenderPass render_pass,
const VulkanRenderTargetCache::Framebuffer* framebuffer) {
SubmitBarriers(false);
if (current_render_pass_ == render_pass &&
current_framebuffer_ == framebuffer) {
return;
}
if (current_render_pass_ != VK_NULL_HANDLE) {
deferred_command_buffer_.CmdVkEndRenderPass();
}
current_render_pass_ = render_pass;
current_framebuffer_ = framebuffer;
VkRenderPassBeginInfo render_pass_begin_info;
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin_info.pNext = nullptr;
render_pass_begin_info.renderPass = render_pass;
render_pass_begin_info.framebuffer = framebuffer->framebuffer;
render_pass_begin_info.renderArea.offset.x = 0;
render_pass_begin_info.renderArea.offset.y = 0;
// TODO(Triang3l): Actual dirty width / height in the deferred command
// buffer.
render_pass_begin_info.renderArea.extent = framebuffer->host_extent;
render_pass_begin_info.clearValueCount = 0;
render_pass_begin_info.pClearValues = nullptr;
deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info,
VK_SUBPASS_CONTENTS_INLINE);
}
void VulkanCommandProcessor::EndRenderPass() {
assert_true(submission_open_);
if (current_render_pass_ == VK_NULL_HANDLE) {
@ -891,7 +1072,7 @@ void VulkanCommandProcessor::EndRenderPass() {
}
deferred_command_buffer_.CmdVkEndRenderPass();
current_render_pass_ = VK_NULL_HANDLE;
current_framebuffer_ = VK_NULL_HANDLE;
current_framebuffer_ = nullptr;
}
const VulkanPipelineCache::PipelineLayoutProvider*
@ -1324,33 +1505,12 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
// TODO(Triang3l): Memory export.
shared_memory_->Use(VulkanSharedMemory::Usage::kRead);
// After all commands that may dispatch, copy or insert barriers, enter the
// render pass before drawing.
VkRenderPass render_pass = render_target_cache_->last_update_render_pass();
const VulkanRenderTargetCache::Framebuffer* framebuffer =
render_target_cache_->last_update_framebuffer();
if (current_render_pass_ != render_pass ||
current_framebuffer_ != framebuffer->framebuffer) {
if (current_render_pass_ != VK_NULL_HANDLE) {
deferred_command_buffer_.CmdVkEndRenderPass();
}
current_render_pass_ = render_pass;
current_framebuffer_ = framebuffer->framebuffer;
VkRenderPassBeginInfo render_pass_begin_info;
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin_info.pNext = nullptr;
render_pass_begin_info.renderPass = render_pass;
render_pass_begin_info.framebuffer = framebuffer->framebuffer;
render_pass_begin_info.renderArea.offset.x = 0;
render_pass_begin_info.renderArea.offset.y = 0;
// TODO(Triang3l): Actual dirty width / height in the deferred command
// buffer.
render_pass_begin_info.renderArea.extent = framebuffer->host_extent;
render_pass_begin_info.clearValueCount = 0;
render_pass_begin_info.pClearValues = nullptr;
deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info,
VK_SUBPASS_CONTENTS_INLINE);
}
// After all commands that may dispatch, copy or insert barriers, submit the
// barriers (may end the render pass), and (re)enter the render pass before
// drawing.
SubmitBarriersAndEnterRenderTargetCacheRenderPass(
render_target_cache_->last_update_render_pass(),
render_target_cache_->last_update_framebuffer());
// Draw.
if (primitive_processing_result.index_buffer_type ==
@ -1589,7 +1749,7 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) {
dynamic_stencil_reference_front_update_needed_ = true;
dynamic_stencil_reference_back_update_needed_ = true;
current_render_pass_ = VK_NULL_HANDLE;
current_framebuffer_ = VK_NULL_HANDLE;
current_framebuffer_ = nullptr;
current_guest_graphics_pipeline_ = VK_NULL_HANDLE;
current_external_graphics_pipeline_ = VK_NULL_HANDLE;
current_guest_graphics_pipeline_layout_ = nullptr;
@ -1759,6 +1919,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
sparse_memory_binds_.clear();
}
SubmitBarriers(true);
assert_false(command_buffers_writable_.empty());
CommandBuffer command_buffer = command_buffers_writable_.back();
if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) {
@ -1884,6 +2046,28 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
return true;
}
void VulkanCommandProcessor::SplitPendingBarrier() {
size_t pending_buffer_memory_barrier_count =
pending_barriers_buffer_memory_barriers_.size();
size_t pending_image_memory_barrier_count =
pending_barriers_image_memory_barriers_.size();
if (!current_pending_barrier_.src_stage_mask &&
!current_pending_barrier_.dst_stage_mask &&
current_pending_barrier_.buffer_memory_barriers_offset >=
pending_buffer_memory_barrier_count &&
current_pending_barrier_.image_memory_barriers_offset >=
pending_image_memory_barrier_count) {
return;
}
pending_barriers_.emplace_back(current_pending_barrier_);
current_pending_barrier_.src_stage_mask = 0;
current_pending_barrier_.dst_stage_mask = 0;
current_pending_barrier_.buffer_memory_barriers_offset =
pending_buffer_memory_barrier_count;
current_pending_barrier_.image_memory_barriers_offset =
pending_image_memory_barrier_count;
}
VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags()
const {
VkShaderStageFlags stages = VK_SHADER_STAGE_VERTEX_BIT;

View File

@ -81,6 +81,31 @@ class VulkanCommandProcessor : public CommandProcessor {
uint64_t GetCurrentFrame() const { return frame_current_; }
uint64_t GetCompletedFrame() const { return frame_completed_; }
// Submission must be open to insert barriers.
void PushBufferMemoryBarrier(
VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask,
uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
bool skip_if_equal = true);
void PushImageMemoryBarrier(
VkImage image, const VkImageSubresourceRange& subresource_range,
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask,
VkImageLayout old_layout, VkImageLayout new_layout,
uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
bool skip_if_equal = true);
// Returns whether any barriers have been submitted - if true is returned, the
// render pass will also be closed.
bool SubmitBarriers(bool force_end_render_pass);
// If not started yet, begins a render pass from the render target cache.
// Submission must be open.
void SubmitBarriersAndEnterRenderTargetCacheRenderPass(
VkRenderPass render_pass,
const VulkanRenderTargetCache::Framebuffer* framebuffer);
// Must be called before doing anything outside the render pass scope,
// including adding pipeline barriers that are not a part of the render pass
// scope. Submission must be open.
@ -205,6 +230,8 @@ class VulkanCommandProcessor : public CommandProcessor {
return !submission_open_ && submissions_in_flight_fences_.empty();
}
void SplitPendingBarrier();
VkShaderStageFlags GetGuestVertexShaderStageFlags() const;
void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info,
@ -314,6 +341,18 @@ class VulkanCommandProcessor : public CommandProcessor {
swap_framebuffers_;
std::deque<std::pair<uint64_t, VkFramebuffer>> swap_framebuffers_outdated_;
// Pending pipeline barriers.
std::vector<VkBufferMemoryBarrier> pending_barriers_buffer_memory_barriers_;
std::vector<VkImageMemoryBarrier> pending_barriers_image_memory_barriers_;
struct PendingBarrier {
VkPipelineStageFlags src_stage_mask = 0;
VkPipelineStageFlags dst_stage_mask = 0;
size_t buffer_memory_barriers_offset = 0;
size_t image_memory_barriers_offset = 0;
};
std::vector<PendingBarrier> pending_barriers_;
PendingBarrier current_pending_barrier_;
// The current dynamic state of the graphics pipeline bind point. Note that
// binding any pipeline to the bind point with static state (even if it's
// unused, like depth bias being disabled, but the values themselves still not
@ -348,7 +387,7 @@ class VulkanCommandProcessor : public CommandProcessor {
// Cache render pass currently started in the command buffer with the
// framebuffer.
VkRenderPass current_render_pass_;
VkFramebuffer current_framebuffer_;
const VulkanRenderTargetCache::Framebuffer* current_framebuffer_;
// Currently bound graphics pipeline, either from the pipeline cache (with
// potentially deferred creation - current_external_graphics_pipeline_ is

View File

@ -95,6 +95,8 @@ void VulkanPrimitiveProcessor::BeginSubmission() {
// been used yet, and builtin_index_buffer_upload_ is written before
// submitting commands reading it.
command_processor_.EndRenderPass();
DeferredCommandBuffer& command_buffer =
command_processor_.deferred_command_buffer();
@ -104,25 +106,10 @@ void VulkanPrimitiveProcessor::BeginSubmission() {
copy_region->dstOffset = 0;
copy_region->size = builtin_index_buffer_size_;
VkBufferMemoryBarrier builtin_index_buffer_memory_barrier;
builtin_index_buffer_memory_barrier.sType =
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
builtin_index_buffer_memory_barrier.pNext = nullptr;
builtin_index_buffer_memory_barrier.srcAccessMask =
VK_ACCESS_TRANSFER_WRITE_BIT;
builtin_index_buffer_memory_barrier.dstAccessMask =
VK_ACCESS_INDEX_READ_BIT;
builtin_index_buffer_memory_barrier.srcQueueFamilyIndex =
VK_QUEUE_FAMILY_IGNORED;
builtin_index_buffer_memory_barrier.dstQueueFamilyIndex =
VK_QUEUE_FAMILY_IGNORED;
builtin_index_buffer_memory_barrier.buffer = builtin_index_buffer_;
builtin_index_buffer_memory_barrier.offset = 0;
builtin_index_buffer_memory_barrier.size = VK_WHOLE_SIZE;
command_processor_.EndRenderPass();
command_buffer.CmdVkPipelineBarrier(
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0,
0, nullptr, 1, &builtin_index_buffer_memory_barrier, 0, nullptr);
command_processor_.PushBufferMemoryBarrier(
builtin_index_buffer_, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_INDEX_READ_BIT);
builtin_index_buffer_upload_submission_ =
command_processor_.GetCurrentSubmission();

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2021 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -183,73 +183,26 @@ bool VulkanRenderTargetCache::Update(bool is_rasterization_done,
last_update_framebuffer_ = framebuffer;
// Transition the used render targets.
VkPipelineStageFlags barrier_src_stage_mask = 0;
VkPipelineStageFlags barrier_dst_stage_mask = 0;
VkImageMemoryBarrier barrier_image_memory[1 + xenos::kMaxColorRenderTargets];
uint32_t barrier_image_memory_count = 0;
for (uint32_t i = 0; i < 1 + xenos::kMaxColorRenderTargets; ++i) {
RenderTarget* rt = depth_and_color_render_targets[i];
if (!rt) {
continue;
}
auto& vulkan_rt = *static_cast<VulkanRenderTarget*>(rt);
VkPipelineStageFlags rt_src_stage_mask = vulkan_rt.current_stage_mask();
VkAccessFlags rt_src_access_mask = vulkan_rt.current_access_mask();
VkImageLayout rt_old_layout = vulkan_rt.current_layout();
VkPipelineStageFlags rt_dst_stage_mask;
VkAccessFlags rt_dst_access_mask;
VkImageLayout rt_new_layout;
if (i) {
rt_dst_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
rt_dst_access_mask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
rt_new_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
} else {
rt_dst_stage_mask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
rt_dst_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
rt_new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
}
bool rt_image_memory_barrier_needed =
rt_src_access_mask != rt_dst_access_mask ||
rt_old_layout != rt_new_layout;
if (rt_image_memory_barrier_needed ||
rt_src_stage_mask != rt_dst_stage_mask) {
barrier_src_stage_mask |= rt_src_stage_mask;
barrier_dst_stage_mask |= rt_dst_stage_mask;
if (rt_image_memory_barrier_needed) {
VkImageMemoryBarrier& rt_image_memory_barrier =
barrier_image_memory[barrier_image_memory_count++];
rt_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
rt_image_memory_barrier.pNext = nullptr;
rt_image_memory_barrier.srcAccessMask = rt_src_access_mask;
rt_image_memory_barrier.dstAccessMask = rt_dst_access_mask;
rt_image_memory_barrier.oldLayout = rt_old_layout;
rt_image_memory_barrier.newLayout = rt_new_layout;
rt_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
rt_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
rt_image_memory_barrier.image = vulkan_rt.image();
VulkanRenderTarget::GetDrawUsage(i == 0, &rt_dst_stage_mask,
&rt_dst_access_mask, &rt_new_layout);
command_processor_.PushImageMemoryBarrier(
vulkan_rt.image(),
ui::vulkan::util::InitializeSubresourceRange(
rt_image_memory_barrier.subresourceRange,
i ? VK_IMAGE_ASPECT_COLOR_BIT
: (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
}
vulkan_rt.SetUsage(rt_dst_stage_mask, rt_dst_access_mask, rt_new_layout);
}
}
if (barrier_src_stage_mask || barrier_dst_stage_mask ||
barrier_image_memory_count) {
if (!barrier_src_stage_mask) {
barrier_src_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
}
if (!barrier_dst_stage_mask) {
barrier_dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
command_processor_.EndRenderPass();
command_processor_.deferred_command_buffer().CmdVkPipelineBarrier(
barrier_src_stage_mask, barrier_dst_stage_mask, 0, 0, nullptr, 0,
nullptr, barrier_image_memory_count, barrier_image_memory);
: (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)),
vulkan_rt.current_stage_mask(), rt_dst_stage_mask,
vulkan_rt.current_access_mask(), rt_dst_access_mask,
vulkan_rt.current_layout(), rt_new_layout);
vulkan_rt.SetUsage(rt_dst_stage_mask, rt_dst_access_mask, rt_new_layout);
}
return true;
@ -288,8 +241,8 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) {
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachment.initialLayout = VulkanRenderTarget::kDepthDrawLayout;
attachment.finalLayout = VulkanRenderTarget::kDepthDrawLayout;
}
VkAttachmentReference color_attachments[xenos::kMaxColorRenderTargets];
xenos::ColorRenderTargetFormat color_formats[] = {
@ -300,7 +253,7 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) {
};
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
VkAttachmentReference& color_attachment = color_attachments[i];
color_attachment.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
color_attachment.layout = VulkanRenderTarget::kColorDrawLayout;
uint32_t attachment_bit = uint32_t(1) << (1 + i);
if (!(key.depth_and_color_used & attachment_bit)) {
color_attachment.attachment = VK_ATTACHMENT_UNUSED;
@ -317,15 +270,14 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) {
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachment.initialLayout = VulkanRenderTarget::kColorDrawLayout;
attachment.finalLayout = VulkanRenderTarget::kColorDrawLayout;
}
VkAttachmentReference depth_stencil_attachment;
depth_stencil_attachment.attachment =
(key.depth_and_color_used & 0b1) ? 0 : VK_ATTACHMENT_UNUSED;
depth_stencil_attachment.layout =
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
depth_stencil_attachment.layout = VulkanRenderTarget::kDepthDrawLayout;
VkSubpassDescription subpass;
subpass.flags = 0;
@ -344,15 +296,12 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) {
VkPipelineStageFlags dependency_stage_mask = 0;
VkAccessFlags dependency_access_mask = 0;
if (key.depth_and_color_used & 0b1) {
dependency_stage_mask |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
dependency_access_mask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
dependency_stage_mask |= VulkanRenderTarget::kDepthDrawStageMask;
dependency_access_mask |= VulkanRenderTarget::kDepthDrawAccessMask;
}
if (key.depth_and_color_used >> 1) {
dependency_stage_mask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency_access_mask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dependency_stage_mask |= VulkanRenderTarget::kColorDrawStageMask;
dependency_access_mask |= VulkanRenderTarget::kColorDrawAccessMask;
}
VkSubpassDependency subpass_dependencies[2];
subpass_dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
@ -577,9 +526,9 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget(
view_create_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
view_create_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
view_create_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
ui::vulkan::util::InitializeSubresourceRange(
view_create_info.subresourceRange,
key.is_depth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT);
view_create_info.subresourceRange =
ui::vulkan::util::InitializeSubresourceRange(
key.is_depth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT);
VkImageView view_depth_color;
if (dfn.vkCreateImageView(device, &view_create_info, nullptr,
&view_depth_color) != VK_SUCCESS) {

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2021 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -113,6 +113,22 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
// Can only be destroyed when framebuffers referencing it are destroyed!
class VulkanRenderTarget final : public RenderTarget {
public:
static constexpr VkPipelineStageFlags kColorDrawStageMask =
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
static constexpr VkAccessFlags kColorDrawAccessMask =
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
static constexpr VkImageLayout kColorDrawLayout =
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
static constexpr VkPipelineStageFlags kDepthDrawStageMask =
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
static constexpr VkAccessFlags kDepthDrawAccessMask =
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
static constexpr VkImageLayout kDepthDrawLayout =
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
// Takes ownership of the Vulkan objects passed to the constructor.
VulkanRenderTarget(RenderTargetKey key,
const ui::vulkan::VulkanProvider& provider,
@ -137,6 +153,26 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
VkImageView view_depth_color() const { return view_depth_color_; }
VkImageView view_depth_stencil() const { return view_depth_stencil_; }
static void GetDrawUsage(bool is_depth,
VkPipelineStageFlags* stage_mask_out,
VkAccessFlags* access_mask_out,
VkImageLayout* layout_out) {
if (stage_mask_out) {
*stage_mask_out = is_depth ? kDepthDrawStageMask : kColorDrawStageMask;
}
if (access_mask_out) {
*access_mask_out =
is_depth ? kDepthDrawAccessMask : kColorDrawAccessMask;
}
if (layout_out) {
*layout_out = is_depth ? kDepthDrawLayout : kColorDrawLayout;
}
}
void GetDrawUsage(VkPipelineStageFlags* stage_mask_out,
VkAccessFlags* access_mask_out,
VkImageLayout* layout_out) const {
GetDrawUsage(key().is_depth, stage_mask_out, access_mask_out, layout_out);
}
VkPipelineStageFlags current_stage_mask() const {
return current_stage_mask_;
}

View File

@ -224,31 +224,28 @@ void VulkanSharedMemory::Use(Usage usage,
std::min(written_range.second, kBufferSize - written_range.first);
assert_true(usage != Usage::kRead || !written_range.second);
if (last_usage_ != usage || last_written_range_.second) {
VkPipelineStageFlags stage_mask_src, stage_mask_dst;
VkBufferMemoryBarrier buffer_memory_barrier;
GetBarrier(last_usage_, stage_mask_src,
buffer_memory_barrier.srcAccessMask);
GetBarrier(usage, stage_mask_dst, buffer_memory_barrier.dstAccessMask);
buffer_memory_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
buffer_memory_barrier.pNext = nullptr;
buffer_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_memory_barrier.buffer = buffer_;
VkPipelineStageFlags src_stage_mask, dst_stage_mask;
VkAccessFlags src_access_mask, dst_access_mask;
GetBarrier(last_usage_, src_stage_mask, src_access_mask);
GetBarrier(usage, dst_stage_mask, dst_access_mask);
VkDeviceSize offset, size;
if (last_usage_ == usage) {
// Committing the previous write.
buffer_memory_barrier.offset = VkDeviceSize(last_written_range_.first);
buffer_memory_barrier.size = VkDeviceSize(last_written_range_.second);
// Committing the previous write, while not changing the access mask
// (passing false as whether to skip the barrier if no masks are changed
// for this reason).
offset = VkDeviceSize(last_written_range_.first);
size = VkDeviceSize(last_written_range_.second);
} else {
// Changing the stage and access mask - all preceding writes must be
// available not only to the source stage, but to the destination as well.
buffer_memory_barrier.offset = 0;
buffer_memory_barrier.size = VK_WHOLE_SIZE;
offset = 0;
size = VK_WHOLE_SIZE;
last_usage_ = usage;
}
command_processor_.EndRenderPass();
command_processor_.deferred_command_buffer().CmdVkPipelineBarrier(
stage_mask_src, stage_mask_dst, 0, 0, nullptr, 1,
&buffer_memory_barrier, 0, nullptr);
command_processor_.PushBufferMemoryBarrier(
buffer_, offset, size, src_stage_mask, dst_stage_mask, src_access_mask,
dst_access_mask, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
false);
}
last_written_range_ = written_range;
}
@ -276,8 +273,8 @@ bool VulkanSharedMemory::InitializeTraceSubmitDownloads() {
return false;
}
command_processor_.EndRenderPass();
Use(Usage::kRead);
command_processor_.SubmitBarriers(true);
DeferredCommandBuffer& command_buffer =
command_processor_.deferred_command_buffer();
@ -295,19 +292,10 @@ bool VulkanSharedMemory::InitializeTraceSubmitDownloads() {
download_buffer_offset += download_range.second;
}
VkBufferMemoryBarrier download_buffer_barrier;
download_buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
download_buffer_barrier.pNext = nullptr;
download_buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
download_buffer_barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
download_buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
download_buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
download_buffer_barrier.buffer = trace_download_buffer_;
download_buffer_barrier.offset = 0;
download_buffer_barrier.size = VK_WHOLE_SIZE;
command_buffer.CmdVkPipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr,
1, &download_buffer_barrier, 0, nullptr);
command_processor_.PushBufferMemoryBarrier(
trace_download_buffer_, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_HOST_READ_BIT);
return true;
}
@ -389,7 +377,6 @@ bool VulkanSharedMemory::UploadRanges(
if (upload_page_ranges.empty()) {
return true;
}
command_processor_.EndRenderPass();
// upload_page_ranges are sorted, use them to determine the range for the
// ordering barrier.
Use(Usage::kTransferDestination,
@ -398,6 +385,7 @@ bool VulkanSharedMemory::UploadRanges(
(upload_page_ranges.back().first + upload_page_ranges.back().second -
upload_page_ranges.front().first)
<< page_size_log2()));
command_processor_.SubmitBarriers(true);
DeferredCommandBuffer& command_buffer =
command_processor_.deferred_command_buffer();
uint64_t submission_current = command_processor_.GetCurrentSubmission();

View File

@ -426,7 +426,8 @@ void VulkanImmediateDrawer::End() {
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
util::InitializeSubresourceRange(image_memory_barrier.subresourceRange);
image_memory_barrier.subresourceRange =
util::InitializeSubresourceRange();
for (const PendingTextureUpload& pending_texture_upload :
texture_uploads_pending_) {
image_memory_barriers.emplace_back(image_memory_barrier).image =
@ -913,7 +914,7 @@ bool VulkanImmediateDrawer::CreateTextureResource(
image_view_create_info.components.g = swizzle;
image_view_create_info.components.b = swizzle;
image_view_create_info.components.a = swizzle;
util::InitializeSubresourceRange(image_view_create_info.subresourceRange);
image_view_create_info.subresourceRange = util::InitializeSubresourceRange();
VkImageView image_view;
if (dfn.vkCreateImageView(device, &image_view_create_info, nullptr,
&image_view) != VK_SUCCESS) {

View File

@ -313,7 +313,7 @@ bool VulkanPresenter::CaptureGuestOutput(RawImage& image_out) {
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = guest_output_image->image();
util::InitializeSubresourceRange(image_memory_barrier.subresourceRange);
image_memory_barrier.subresourceRange = util::InitializeSubresourceRange();
dfn.vkCmdPipelineBarrier(command_buffer, kGuestOutputInternalStageMask,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
nullptr, 1, &image_memory_barrier);

View File

@ -116,17 +116,18 @@ inline VkExtent2D GetMax2DFramebufferExtent(const VulkanProvider& provider) {
return max_extent;
}
inline void InitializeSubresourceRange(
VkImageSubresourceRange& range,
inline VkImageSubresourceRange InitializeSubresourceRange(
VkImageAspectFlags aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT,
uint32_t base_mip_level = 0, uint32_t level_count = VK_REMAINING_MIP_LEVELS,
uint32_t base_array_layer = 0,
uint32_t layer_count = VK_REMAINING_ARRAY_LAYERS) {
VkImageSubresourceRange range;
range.aspectMask = aspect_mask;
range.baseMipLevel = base_mip_level;
range.levelCount = level_count;
range.baseArrayLayer = base_array_layer;
range.layerCount = layer_count;
return range;
}
// Creates a buffer backed by a dedicated allocation. The allocation size will