diff --git a/src/xenia/gpu/shared_memory.h b/src/xenia/gpu/shared_memory.h index 6dae85909..43221426a 100644 --- a/src/xenia/gpu/shared_memory.h +++ b/src/xenia/gpu/shared_memory.h @@ -123,7 +123,9 @@ class SharedMemory { // successfully uploaded range as early as possible, before the memcpy, to // make sure invalidation that happened during the CPU -> GPU memcpy isn't // missed (upload_page_ranges is in pages because of this - MarkRangeValid has - // page granularity). + // page granularity). upload_page_ranges are sorted in ascending address + // order, so front and back can be used to determine the overall bounds of + // pages to be uploaded. virtual bool UploadRanges( const std::vector>& upload_page_ranges) = 0; diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.cc b/src/xenia/gpu/vulkan/deferred_command_buffer.cc new file mode 100644 index 000000000..f9c359506 --- /dev/null +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.cc @@ -0,0 +1,208 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/deferred_command_buffer.h" + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +DeferredCommandBuffer::DeferredCommandBuffer( + const VulkanCommandProcessor& command_processor, size_t initial_size) + : command_processor_(command_processor) { + command_stream_.reserve(initial_size / sizeof(uintmax_t)); +} + +void DeferredCommandBuffer::Reset() { command_stream_.clear(); } + +void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = + command_processor_.GetVulkanContext().GetVulkanProvider().dfn(); + const uintmax_t* stream = command_stream_.data(); + size_t stream_remaining = command_stream_.size(); + while (stream_remaining) { + const CommandHeader& header = + *reinterpret_cast(stream); + stream += kCommandHeaderSizeElements; + stream_remaining -= kCommandHeaderSizeElements; + + switch (header.command) { + case Command::kVkBindIndexBuffer: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdBindIndexBuffer(command_buffer, args.buffer, args.offset, + args.index_type); + } break; + + case Command::kVkCopyBuffer: { + auto& args = *reinterpret_cast(stream); + static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); + dfn.vkCmdCopyBuffer( + command_buffer, args.src_buffer, args.dst_buffer, args.region_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)))); + } break; + + case Command::kVkPipelineBarrier: { + auto& args = *reinterpret_cast(stream); + size_t barrier_offset_bytes = sizeof(ArgsVkPipelineBarrier); + + const VkMemoryBarrier* memory_barriers; + if (args.memory_barrier_count) { + static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); + barrier_offset_bytes = + xe::align(barrier_offset_bytes, alignof(VkMemoryBarrier)); + memory_barriers = reinterpret_cast( + reinterpret_cast(stream) + barrier_offset_bytes); + barrier_offset_bytes += + sizeof(VkMemoryBarrier) * args.memory_barrier_count; + } else { + memory_barriers = nullptr; + } + + const VkBufferMemoryBarrier* buffer_memory_barriers; + if (args.buffer_memory_barrier_count) { + static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); + barrier_offset_bytes = + xe::align(barrier_offset_bytes, alignof(VkBufferMemoryBarrier)); + buffer_memory_barriers = + reinterpret_cast( + reinterpret_cast(stream) + + barrier_offset_bytes); + barrier_offset_bytes += + sizeof(VkBufferMemoryBarrier) * args.buffer_memory_barrier_count; + } else { + buffer_memory_barriers = nullptr; + } + + const VkImageMemoryBarrier* image_memory_barriers; + if (args.image_memory_barrier_count) { + static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); + barrier_offset_bytes = + xe::align(barrier_offset_bytes, alignof(VkImageMemoryBarrier)); + image_memory_barriers = reinterpret_cast( + reinterpret_cast(stream) + barrier_offset_bytes); + barrier_offset_bytes += + sizeof(VkImageMemoryBarrier) * args.image_memory_barrier_count; + } else { + image_memory_barriers = nullptr; + } + + dfn.vkCmdPipelineBarrier( + command_buffer, args.src_stage_mask, args.dst_stage_mask, + args.dependency_flags, args.memory_barrier_count, memory_barriers, + args.buffer_memory_barrier_count, buffer_memory_barriers, + args.image_memory_barrier_count, image_memory_barriers); + } break; + + default: + assert_unhandled_case(header.command); + break; + } + + stream += header.arguments_size_elements; + stream_remaining -= header.arguments_size_elements; + } +} + +void DeferredCommandBuffer::CmdVkPipelineBarrier( + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, uint32_t memory_barrier_count, + const VkMemoryBarrier* memory_barriers, + uint32_t buffer_memory_barrier_count, + const VkBufferMemoryBarrier* buffer_memory_barriers, + uint32_t image_memory_barrier_count, + const VkImageMemoryBarrier* image_memory_barriers) { + size_t arguments_size = sizeof(ArgsVkPipelineBarrier); + + size_t memory_barriers_offset; + if (memory_barrier_count) { + static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); + arguments_size = xe::align(arguments_size, alignof(VkMemoryBarrier)); + memory_barriers_offset = arguments_size; + arguments_size += sizeof(VkMemoryBarrier) * memory_barrier_count; + } else { + memory_barriers_offset = 0; + } + + size_t buffer_memory_barriers_offset; + if (buffer_memory_barrier_count) { + static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); + arguments_size = xe::align(arguments_size, alignof(VkBufferMemoryBarrier)); + buffer_memory_barriers_offset = arguments_size; + arguments_size += + sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count; + } else { + buffer_memory_barriers_offset = 0; + } + + size_t image_memory_barriers_offset; + if (image_memory_barrier_count) { + static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); + arguments_size = xe::align(arguments_size, alignof(VkImageMemoryBarrier)); + image_memory_barriers_offset = arguments_size; + arguments_size += sizeof(VkImageMemoryBarrier) * image_memory_barrier_count; + } else { + image_memory_barriers_offset = 0; + } + + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkPipelineBarrier, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.src_stage_mask = src_stage_mask; + args.dst_stage_mask = dst_stage_mask; + args.dependency_flags = dependency_flags; + args.memory_barrier_count = memory_barrier_count; + args.buffer_memory_barrier_count = buffer_memory_barrier_count; + args.image_memory_barrier_count = image_memory_barrier_count; + if (memory_barrier_count) { + std::memcpy(args_ptr + memory_barriers_offset, memory_barriers, + sizeof(VkMemoryBarrier) * memory_barrier_count); + } + if (buffer_memory_barrier_count) { + std::memcpy(args_ptr + buffer_memory_barriers_offset, + buffer_memory_barriers, + sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count); + } + if (image_memory_barrier_count) { + std::memcpy(args_ptr + image_memory_barriers_offset, image_memory_barriers, + sizeof(VkImageMemoryBarrier) * image_memory_barrier_count); + } +} + +void* DeferredCommandBuffer::WriteCommand(Command command, + size_t arguments_size_bytes) { + size_t arguments_size_elements = + (arguments_size_bytes + sizeof(uintmax_t) - 1) / sizeof(uintmax_t); + size_t offset = command_stream_.size(); + command_stream_.resize(offset + kCommandHeaderSizeElements + + arguments_size_elements); + CommandHeader& header = + *reinterpret_cast(command_stream_.data() + offset); + header.command = command; + header.arguments_size_elements = uint32_t(arguments_size_elements); + return command_stream_.data() + (offset + kCommandHeaderSizeElements); +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h new file mode 100644 index 000000000..476abe605 --- /dev/null +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -0,0 +1,120 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_ +#define XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_ + +#include +#include +#include + +#include "xenia/base/math.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class DeferredCommandBuffer { + public: + DeferredCommandBuffer(const VulkanCommandProcessor& command_processor, + size_t initial_size_bytes = 1024 * 1024); + + void Reset(); + void Execute(VkCommandBuffer command_buffer); + + void CmdVkBindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, + VkIndexType index_type) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkBindIndexBuffer, sizeof(ArgsVkBindIndexBuffer))); + args.buffer = buffer; + args.offset = offset; + args.index_type = index_type; + } + + void CmdVkCopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, + uint32_t region_count, const VkBufferCopy* regions) { + static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); + const size_t header_size = + xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkCopyBuffer, + header_size + sizeof(VkBufferCopy) * region_count)); + auto& args = *reinterpret_cast(args_ptr); + args.src_buffer = src_buffer; + args.dst_buffer = dst_buffer; + args.region_count = region_count; + std::memcpy(args_ptr + header_size, regions, + sizeof(VkBufferCopy) * region_count); + } + + // pNext of all barriers must be null. + void CmdVkPipelineBarrier(VkPipelineStageFlags src_stage_mask, + VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + uint32_t memory_barrier_count, + const VkMemoryBarrier* memory_barriers, + uint32_t buffer_memory_barrier_count, + const VkBufferMemoryBarrier* buffer_memory_barriers, + uint32_t image_memory_barrier_count, + const VkImageMemoryBarrier* image_memory_barriers); + + private: + enum class Command { + kVkBindIndexBuffer, + kVkCopyBuffer, + kVkPipelineBarrier, + }; + + struct CommandHeader { + Command command; + uint32_t arguments_size_elements; + }; + static constexpr size_t kCommandHeaderSizeElements = + (sizeof(CommandHeader) + sizeof(uintmax_t) - 1) / sizeof(uintmax_t); + + struct ArgsVkBindIndexBuffer { + VkBuffer buffer; + VkDeviceSize offset; + VkIndexType index_type; + }; + + struct ArgsVkCopyBuffer { + VkBuffer src_buffer; + VkBuffer dst_buffer; + uint32_t region_count; + // Followed by VkBufferCopy[]. + }; + + struct ArgsVkPipelineBarrier { + VkPipelineStageFlags src_stage_mask; + VkPipelineStageFlags dst_stage_mask; + VkDependencyFlags dependency_flags; + uint32_t memory_barrier_count; + uint32_t buffer_memory_barrier_count; + uint32_t image_memory_barrier_count; + // Followed by aligned VkMemoryBarrier[], VkBufferMemoryBarrier[], + // VkImageMemoryBarrier[]. + }; + + void* WriteCommand(Command command, size_t arguments_size_bytes); + + const VulkanCommandProcessor& command_processor_; + + // uintmax_t to ensure uint64_t and pointer alignment of all structures. + std::vector command_stream_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_ \ No newline at end of file diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index f05838d29..c688ca6ee 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -15,6 +15,8 @@ #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/profiling.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/ui/vulkan/vulkan_context.h" #include "xenia/ui/vulkan/vulkan_provider.h" #include "xenia/ui/vulkan/vulkan_util.h" @@ -25,7 +27,9 @@ namespace vulkan { VulkanCommandProcessor::VulkanCommandProcessor( VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state) - : CommandProcessor(graphics_system, kernel_state) {} + : CommandProcessor(graphics_system, kernel_state), + deferred_command_buffer_(*this) {} + VulkanCommandProcessor::~VulkanCommandProcessor() = default; void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, @@ -39,10 +43,12 @@ bool VulkanCommandProcessor::SetupContext() { return false; } - const ui::vulkan::VulkanProvider& provider = - GetVulkanContext().GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - VkDevice device = provider.device(); + shared_memory_ = + std::make_unique(*this, *memory_, trace_writer_); + if (!shared_memory_->Initialize()) { + XELOGE("Failed to initialize shared memory"); + return false; + } return true; } @@ -50,11 +56,14 @@ bool VulkanCommandProcessor::SetupContext() { void VulkanCommandProcessor::ShutdownContext() { AwaitAllQueueOperationsCompletion(); + shared_memory_.reset(); + const ui::vulkan::VulkanProvider& provider = GetVulkanContext().GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + deferred_command_buffer_.Reset(); for (const auto& command_buffer_pair : command_buffers_submitted_) { dfn.vkDestroyCommandPool(device, command_buffer_pair.first.pool, nullptr); } @@ -119,19 +128,46 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, uint32_t index_count, IndexBufferInfo* index_buffer_info, bool major_mode_explicit) { -#if FINE_GRAINED_DRAW_SCOPES +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES BeginSubmission(true); + bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base; + + // Actually draw. + if (indexed) { + uint32_t index_size = + index_buffer_info->format == xenos::IndexFormat::kInt32 + ? sizeof(uint32_t) + : sizeof(uint16_t); + assert_false(index_buffer_info->guest_base & (index_size - 1)); + uint32_t index_base = + index_buffer_info->guest_base & 0x1FFFFFFF & ~(index_size - 1); + uint32_t index_buffer_size = index_buffer_info->count * index_size; + if (!shared_memory_->RequestRange(index_base, index_buffer_size)) { + XELOGE( + "Failed to request index buffer at 0x{:08X} (size {}) in the shared " + "memory", + index_base, index_buffer_size); + return false; + } + deferred_command_buffer_.CmdVkBindIndexBuffer( + shared_memory_->buffer(), index_base, + index_buffer_info->format == xenos::IndexFormat::kInt32 + ? VK_INDEX_TYPE_UINT32 + : VK_INDEX_TYPE_UINT16); + } + shared_memory_->Use(VulkanSharedMemory::Usage::kRead); + return true; } bool VulkanCommandProcessor::IssueCopy() { -#if FINE_GRAINED_DRAW_SCOPES +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES BeginSubmission(true); @@ -217,12 +253,14 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { command_buffers_writable_.push_back(command_buffer_pair.first); command_buffers_submitted_.pop_front(); } + + shared_memory_->CompletedSubmissionUpdated(); } void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { -#if FINE_GRAINED_DRAW_SCOPES +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES bool is_opening_frame = is_guest_command && !frame_open_; if (submission_open_ && !is_opening_frame) { @@ -257,6 +295,11 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { if (!submission_open_) { submission_open_ = true; + + // Start a new deferred command buffer - will submit it to the real one in + // the end of the submission (when async pipeline state object creation + // requests are fulfilled). + deferred_command_buffer_.Reset(); } if (is_opening_frame) { @@ -321,6 +364,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { bool is_closing_frame = is_swap && frame_open_; if (submission_open_) { + shared_memory_->EndSubmission(); + assert_false(command_buffers_writable_.empty()); CommandBuffer command_buffer = command_buffers_writable_.back(); if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) { @@ -339,6 +384,7 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { XELOGE("Failed to begin a Vulkan command buffer"); return false; } + deferred_command_buffer_.Execute(command_buffer.buffer); // TODO(Triang3l): Write deferred command buffer commands. if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) { XELOGE("Failed to end a Vulkan command buffer"); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 90409159d..016f9f7d0 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -16,7 +16,9 @@ #include #include "xenia/gpu/command_processor.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" #include "xenia/ui/vulkan/vulkan_context.h" @@ -39,6 +41,13 @@ class VulkanCommandProcessor : public CommandProcessor { return static_cast(*context_); } + // Returns the deferred drawing command list for the currently open + // submission. + DeferredCommandBuffer& deferred_command_buffer() { + assert_true(submission_open_); + return deferred_command_buffer_; + } + uint64_t GetCurrentSubmission() const { return submission_completed_ + uint64_t(submissions_in_flight_fences_.size()) + 1; @@ -113,6 +122,9 @@ class VulkanCommandProcessor : public CommandProcessor { }; std::vector command_buffers_writable_; std::deque> command_buffers_submitted_; + DeferredCommandBuffer deferred_command_buffer_; + + std::unique_ptr shared_memory_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc new file mode 100644 index 000000000..ce8818cd4 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -0,0 +1,309 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" + +#include +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanSharedMemory::VulkanSharedMemory( + VulkanCommandProcessor& command_processor, Memory& memory, + TraceWriter& trace_writer) + : SharedMemory(memory), + command_processor_(command_processor), + trace_writer_(trace_writer) {} + +VulkanSharedMemory::~VulkanSharedMemory() { Shutdown(true); } + +bool VulkanSharedMemory::Initialize() { + InitializeCommon(); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + VkBufferCreateInfo buffer_create_info; + buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_create_info.pNext = nullptr; + buffer_create_info.flags = 0; + const VkBufferCreateFlags sparse_flags = + VK_BUFFER_CREATE_SPARSE_BINDING_BIT | + VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT; + // TODO(Triang3l): Sparse binding. + buffer_create_info.size = kBufferSize; + buffer_create_info.usage = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_create_info.queueFamilyIndexCount = 0; + buffer_create_info.pQueueFamilyIndices = nullptr; + VkResult buffer_create_result = + dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_); + if (buffer_create_result != VK_SUCCESS) { + if (buffer_create_info.flags & sparse_flags) { + buffer_create_info.flags &= ~sparse_flags; + buffer_create_result = + dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_); + } + if (buffer_create_result != VK_SUCCESS) { + XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer", + kBufferSize >> 20); + Shutdown(); + return false; + } + } + VkMemoryRequirements buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer_, + &buffer_memory_requirements); + // TODO(Triang3l): Determine sparse binding properties from memory + // requirements. + if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &buffer_memory_type_)) { + XELOGE( + "Shared memory: Failed to get a device-local Vulkan memory type for " + "the buffer"); + Shutdown(); + return false; + } + if (!(buffer_create_info.flags & sparse_flags)) { + VkMemoryAllocateInfo buffer_memory_allocate_info; + buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + buffer_memory_allocate_info.pNext = nullptr; + buffer_memory_allocate_info.allocationSize = + buffer_memory_requirements.size; + buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_; + VkDeviceMemory buffer_memory; + if (dfn.vkAllocateMemory(device, &buffer_memory_allocate_info, nullptr, + &buffer_memory) != VK_SUCCESS) { + XELOGE( + "Shared memory: Failed to allocate {} MB of memory for the Vulkan " + "buffer", + kBufferSize >> 20); + Shutdown(); + return false; + } + buffer_memory_.push_back(buffer_memory); + if (dfn.vkBindBufferMemory(device, buffer_, buffer_memory, 0) != + VK_SUCCESS) { + XELOGE("Shared memory: Failed to bind memory to the Vulkan buffer"); + Shutdown(); + return false; + } + } + + upload_buffer_pool_ = std::make_unique( + provider, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + xe::align(ui::vulkan::VulkanUploadBufferPool::kDefaultPageSize, + size_t(1) << page_size_log2())); + + return true; +} + +void VulkanSharedMemory::Shutdown(bool from_destructor) { + upload_buffer_pool_.reset(); + + last_written_range_ = std::make_pair(0, 0); + last_usage_ = Usage::kTransferDestination; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_); + + buffer_memory_allocated_.clear(); + for (VkDeviceMemory memory : buffer_memory_) { + dfn.vkFreeMemory(device, memory, nullptr); + } + buffer_memory_.clear(); + + // If calling from the destructor, the SharedMemory destructor will call + // ShutdownCommon. + if (!from_destructor) { + ShutdownCommon(); + } +} + +void VulkanSharedMemory::CompletedSubmissionUpdated() { + upload_buffer_pool_->Reclaim(command_processor_.GetCompletedSubmission()); +} + +void VulkanSharedMemory::EndSubmission() { upload_buffer_pool_->FlushWrites(); } + +void VulkanSharedMemory::Use(Usage usage, + std::pair written_range) { + written_range.first = std::min(written_range.first, kBufferSize); + written_range.second = + std::min(written_range.second, kBufferSize - written_range.first); + assert_true(usage != Usage::kRead || !written_range.second); + if (last_usage_ != usage || last_written_range_.second) { + VkPipelineStageFlags stage_mask_src, stage_mask_dst; + VkBufferMemoryBarrier buffer_memory_barrier; + GetBarrier(last_usage_, stage_mask_src, + buffer_memory_barrier.srcAccessMask); + GetBarrier(usage, stage_mask_dst, buffer_memory_barrier.dstAccessMask); + buffer_memory_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + buffer_memory_barrier.pNext = nullptr; + buffer_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_memory_barrier.buffer = buffer_; + if (last_usage_ == usage) { + // Committing the previous write. + buffer_memory_barrier.offset = VkDeviceSize(last_written_range_.first); + buffer_memory_barrier.size = VkDeviceSize(last_written_range_.second); + } else { + // Changing the stage and access mask - all preceding writes must be + // available not only to the source stage, but to the destination as well. + buffer_memory_barrier.offset = 0; + buffer_memory_barrier.size = VK_WHOLE_SIZE; + last_usage_ = usage; + } + command_processor_.deferred_command_buffer().CmdVkPipelineBarrier( + stage_mask_src, stage_mask_dst, 0, 0, nullptr, 1, + &buffer_memory_barrier, 0, nullptr); + } + last_written_range_ = written_range; +} + +bool VulkanSharedMemory::EnsureHostGpuMemoryAllocated(uint32_t start, + uint32_t length) { + // TODO(Triang3l): Do sparse binding. + return true; +} + +bool VulkanSharedMemory::UploadRanges( + const std::vector>& upload_page_ranges) { + if (upload_page_ranges.empty()) { + return true; + } + // upload_page_ranges are sorted, use them to determine the range for the + // ordering barrier. + Use(Usage::kTransferDestination, + std::make_pair( + upload_page_ranges.front().first << page_size_log2(), + (upload_page_ranges.back().first + upload_page_ranges.back().second - + upload_page_ranges.front().first) + << page_size_log2())); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + uint64_t submission_current = command_processor_.GetCurrentSubmission(); + bool successful = true; + upload_regions_.clear(); + VkBuffer upload_buffer_previous = VK_NULL_HANDLE; + for (auto upload_range : upload_page_ranges) { + uint32_t upload_range_start = upload_range.first; + uint32_t upload_range_length = upload_range.second; + trace_writer_.WriteMemoryRead(upload_range_start << page_size_log2(), + upload_range_length << page_size_log2()); + while (upload_range_length) { + VkBuffer upload_buffer; + VkDeviceSize upload_buffer_offset, upload_buffer_size; + uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial( + submission_current, upload_range_length << page_size_log2(), + size_t(1) << page_size_log2(), upload_buffer, upload_buffer_offset, + upload_buffer_size); + if (upload_buffer_mapping == nullptr) { + XELOGE("Shared memory: Failed to get a Vulkan upload buffer"); + successful = false; + break; + } + MakeRangeValid(upload_range_start << page_size_log2(), + uint32_t(upload_buffer_size), false); + std::memcpy( + upload_buffer_mapping, + memory().TranslatePhysical(upload_range_start << page_size_log2()), + upload_buffer_size); + if (upload_buffer_previous != upload_buffer && !upload_regions_.empty()) { + assert_true(upload_buffer_previous != VK_NULL_HANDLE); + command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_, + uint32_t(upload_regions_.size()), + upload_regions_.data()); + upload_regions_.clear(); + } + upload_buffer_previous = upload_buffer; + VkBufferCopy& upload_region = upload_regions_.emplace_back(); + upload_region.srcOffset = upload_buffer_offset; + upload_region.dstOffset = + VkDeviceSize(upload_range_start << page_size_log2()); + upload_region.size = upload_buffer_size; + uint32_t upload_buffer_pages = + uint32_t(upload_buffer_size >> page_size_log2()); + upload_range_start += upload_buffer_pages; + upload_range_length -= upload_buffer_pages; + } + if (!successful) { + break; + } + } + if (!upload_regions_.empty()) { + assert_true(upload_buffer_previous != VK_NULL_HANDLE); + command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_, + uint32_t(upload_regions_.size()), + upload_regions_.data()); + upload_regions_.clear(); + } + return successful; +} + +void VulkanSharedMemory::GetBarrier(Usage usage, + VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask) const { + switch (usage) { + case Usage::kComputeWrite: + stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask = VK_ACCESS_SHADER_READ_BIT; + return; + case Usage::kTransferDestination: + stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + return; + } + stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + if (provider.device_features().tessellationShader) { + stage_mask |= VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; + } + access_mask = VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_SHADER_READ_BIT; + switch (usage) { + case Usage::kRead: + stage_mask |= + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask |= VK_ACCESS_TRANSFER_READ_BIT; + break; + case Usage::kGuestDrawReadWrite: + access_mask |= VK_ACCESS_SHADER_WRITE_BIT; + break; + default: + assert_unhandled_case(usage); + } +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.h b/src/xenia/gpu/vulkan/vulkan_shared_memory.h new file mode 100644 index 000000000..dec2b8280 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.h @@ -0,0 +1,97 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_ +#define XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_ + +#include +#include +#include +#include + +#include "xenia/gpu/shared_memory.h" +#include "xenia/gpu/trace_writer.h" +#include "xenia/memory.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class VulkanSharedMemory : public SharedMemory { + public: + VulkanSharedMemory(VulkanCommandProcessor& command_processor, Memory& memory, + TraceWriter& trace_writer); + ~VulkanSharedMemory() override; + + bool Initialize(); + void Shutdown(bool from_destructor = false); + + void CompletedSubmissionUpdated(); + void EndSubmission(); + + enum class Usage { + // Index buffer, vfetch, compute read, transfer source. + kRead, + // Index buffer, vfetch, memexport. + kGuestDrawReadWrite, + kComputeWrite, + kTransferDestination, + }; + // Places pipeline barrier for the target usage, also ensuring writes of + // adjacent are ordered with writes of each other and reads. + void Use(Usage usage, std::pair written_range = {}); + + VkBuffer buffer() const { return buffer_; } + + protected: + bool EnsureHostGpuMemoryAllocated(uint32_t start, uint32_t length) override; + + bool UploadRanges(const std::vector>& + upload_page_ranges) override; + + private: + bool IsSparse() const { + return buffer_allocation_size_log2_ < kBufferSizeLog2; + } + + void GetBarrier(Usage usage, VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask) const; + + VulkanCommandProcessor& command_processor_; + TraceWriter& trace_writer_; + + VkBuffer buffer_ = VK_NULL_HANDLE; + uint32_t buffer_memory_type_; + // Maximum of 1024 allocations in the worst case for all of the buffer because + // of the overall 4096 allocation count limit on Windows drivers. + static constexpr uint32_t kMinBufferAllocationSizeLog2 = + std::max(kOptimalAllocationLog2, kBufferSizeLog2 - 10); + uint32_t buffer_allocation_size_log2_ = kBufferSizeLog2; + // Sparse memory allocations, of different sizes. + std::vector buffer_memory_; + // One bit per every 2^buffer_allocation_size_log2_ of the buffer. + std::vector buffer_memory_allocated_; + + // First usage will likely be uploading. + Usage last_usage_ = Usage::kTransferDestination; + std::pair last_written_range_ = {}; + + std::unique_ptr upload_buffer_pool_; + std::vector upload_regions_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_ diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 6444af302..bbe90b04c 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -538,6 +538,7 @@ bool VulkanProvider::Initialize() { XE_VULKAN_LOAD_DFN(vkCmdBindPipeline); XE_VULKAN_LOAD_DFN(vkCmdBindVertexBuffers); XE_VULKAN_LOAD_DFN(vkCmdClearColorImage); + XE_VULKAN_LOAD_DFN(vkCmdCopyBuffer); XE_VULKAN_LOAD_DFN(vkCmdCopyBufferToImage); XE_VULKAN_LOAD_DFN(vkCmdDraw); XE_VULKAN_LOAD_DFN(vkCmdDrawIndexed); diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 2e14b9398..8d7c10ed3 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -35,6 +35,8 @@ #define XELOGVK XELOGI +#define XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES 1 + namespace xe { namespace ui { namespace vulkan { @@ -139,6 +141,7 @@ class VulkanProvider : public GraphicsProvider { PFN_vkCmdBindPipeline vkCmdBindPipeline; PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers; PFN_vkCmdClearColorImage vkCmdClearColorImage; + PFN_vkCmdCopyBuffer vkCmdCopyBuffer; PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage; PFN_vkCmdDraw vkCmdDraw; PFN_vkCmdDrawIndexed vkCmdDrawIndexed;