[Vulkan] Basic shared memory uploading

This commit is contained in:
Triang3l 2020-10-04 21:56:54 +03:00
parent 1fd0d48066
commit 2c50c670d8
9 changed files with 810 additions and 12 deletions

View File

@ -123,7 +123,9 @@ class SharedMemory {
// successfully uploaded range as early as possible, before the memcpy, to
// make sure invalidation that happened during the CPU -> GPU memcpy isn't
// missed (upload_page_ranges is in pages because of this - MarkRangeValid has
// page granularity).
// page granularity). upload_page_ranges are sorted in ascending address
// order, so front and back can be used to determine the overall bounds of
// pages to be uploaded.
virtual bool UploadRanges(
const std::vector<std::pair<uint32_t, uint32_t>>& upload_page_ranges) = 0;

View File

@ -0,0 +1,208 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
namespace xe {
namespace gpu {
namespace vulkan {
DeferredCommandBuffer::DeferredCommandBuffer(
const VulkanCommandProcessor& command_processor, size_t initial_size)
: command_processor_(command_processor) {
command_stream_.reserve(initial_size / sizeof(uintmax_t));
}
void DeferredCommandBuffer::Reset() { command_stream_.clear(); }
void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn =
command_processor_.GetVulkanContext().GetVulkanProvider().dfn();
const uintmax_t* stream = command_stream_.data();
size_t stream_remaining = command_stream_.size();
while (stream_remaining) {
const CommandHeader& header =
*reinterpret_cast<const CommandHeader*>(stream);
stream += kCommandHeaderSizeElements;
stream_remaining -= kCommandHeaderSizeElements;
switch (header.command) {
case Command::kVkBindIndexBuffer: {
auto& args = *reinterpret_cast<const ArgsVkBindIndexBuffer*>(stream);
dfn.vkCmdBindIndexBuffer(command_buffer, args.buffer, args.offset,
args.index_type);
} break;
case Command::kVkCopyBuffer: {
auto& args = *reinterpret_cast<const ArgsVkCopyBuffer*>(stream);
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
dfn.vkCmdCopyBuffer(
command_buffer, args.src_buffer, args.dst_buffer, args.region_count,
reinterpret_cast<const VkBufferCopy*>(
reinterpret_cast<const uint8_t*>(stream) +
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy))));
} break;
case Command::kVkPipelineBarrier: {
auto& args = *reinterpret_cast<const ArgsVkPipelineBarrier*>(stream);
size_t barrier_offset_bytes = sizeof(ArgsVkPipelineBarrier);
const VkMemoryBarrier* memory_barriers;
if (args.memory_barrier_count) {
static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t));
barrier_offset_bytes =
xe::align(barrier_offset_bytes, alignof(VkMemoryBarrier));
memory_barriers = reinterpret_cast<const VkMemoryBarrier*>(
reinterpret_cast<const uint8_t*>(stream) + barrier_offset_bytes);
barrier_offset_bytes +=
sizeof(VkMemoryBarrier) * args.memory_barrier_count;
} else {
memory_barriers = nullptr;
}
const VkBufferMemoryBarrier* buffer_memory_barriers;
if (args.buffer_memory_barrier_count) {
static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t));
barrier_offset_bytes =
xe::align(barrier_offset_bytes, alignof(VkBufferMemoryBarrier));
buffer_memory_barriers =
reinterpret_cast<const VkBufferMemoryBarrier*>(
reinterpret_cast<const uint8_t*>(stream) +
barrier_offset_bytes);
barrier_offset_bytes +=
sizeof(VkBufferMemoryBarrier) * args.buffer_memory_barrier_count;
} else {
buffer_memory_barriers = nullptr;
}
const VkImageMemoryBarrier* image_memory_barriers;
if (args.image_memory_barrier_count) {
static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t));
barrier_offset_bytes =
xe::align(barrier_offset_bytes, alignof(VkImageMemoryBarrier));
image_memory_barriers = reinterpret_cast<const VkImageMemoryBarrier*>(
reinterpret_cast<const uint8_t*>(stream) + barrier_offset_bytes);
barrier_offset_bytes +=
sizeof(VkImageMemoryBarrier) * args.image_memory_barrier_count;
} else {
image_memory_barriers = nullptr;
}
dfn.vkCmdPipelineBarrier(
command_buffer, args.src_stage_mask, args.dst_stage_mask,
args.dependency_flags, args.memory_barrier_count, memory_barriers,
args.buffer_memory_barrier_count, buffer_memory_barriers,
args.image_memory_barrier_count, image_memory_barriers);
} break;
default:
assert_unhandled_case(header.command);
break;
}
stream += header.arguments_size_elements;
stream_remaining -= header.arguments_size_elements;
}
}
void DeferredCommandBuffer::CmdVkPipelineBarrier(
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
VkDependencyFlags dependency_flags, uint32_t memory_barrier_count,
const VkMemoryBarrier* memory_barriers,
uint32_t buffer_memory_barrier_count,
const VkBufferMemoryBarrier* buffer_memory_barriers,
uint32_t image_memory_barrier_count,
const VkImageMemoryBarrier* image_memory_barriers) {
size_t arguments_size = sizeof(ArgsVkPipelineBarrier);
size_t memory_barriers_offset;
if (memory_barrier_count) {
static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t));
arguments_size = xe::align(arguments_size, alignof(VkMemoryBarrier));
memory_barriers_offset = arguments_size;
arguments_size += sizeof(VkMemoryBarrier) * memory_barrier_count;
} else {
memory_barriers_offset = 0;
}
size_t buffer_memory_barriers_offset;
if (buffer_memory_barrier_count) {
static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t));
arguments_size = xe::align(arguments_size, alignof(VkBufferMemoryBarrier));
buffer_memory_barriers_offset = arguments_size;
arguments_size +=
sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count;
} else {
buffer_memory_barriers_offset = 0;
}
size_t image_memory_barriers_offset;
if (image_memory_barrier_count) {
static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t));
arguments_size = xe::align(arguments_size, alignof(VkImageMemoryBarrier));
image_memory_barriers_offset = arguments_size;
arguments_size += sizeof(VkImageMemoryBarrier) * image_memory_barrier_count;
} else {
image_memory_barriers_offset = 0;
}
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkPipelineBarrier, arguments_size));
auto& args = *reinterpret_cast<ArgsVkPipelineBarrier*>(args_ptr);
args.src_stage_mask = src_stage_mask;
args.dst_stage_mask = dst_stage_mask;
args.dependency_flags = dependency_flags;
args.memory_barrier_count = memory_barrier_count;
args.buffer_memory_barrier_count = buffer_memory_barrier_count;
args.image_memory_barrier_count = image_memory_barrier_count;
if (memory_barrier_count) {
std::memcpy(args_ptr + memory_barriers_offset, memory_barriers,
sizeof(VkMemoryBarrier) * memory_barrier_count);
}
if (buffer_memory_barrier_count) {
std::memcpy(args_ptr + buffer_memory_barriers_offset,
buffer_memory_barriers,
sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count);
}
if (image_memory_barrier_count) {
std::memcpy(args_ptr + image_memory_barriers_offset, image_memory_barriers,
sizeof(VkImageMemoryBarrier) * image_memory_barrier_count);
}
}
void* DeferredCommandBuffer::WriteCommand(Command command,
size_t arguments_size_bytes) {
size_t arguments_size_elements =
(arguments_size_bytes + sizeof(uintmax_t) - 1) / sizeof(uintmax_t);
size_t offset = command_stream_.size();
command_stream_.resize(offset + kCommandHeaderSizeElements +
arguments_size_elements);
CommandHeader& header =
*reinterpret_cast<CommandHeader*>(command_stream_.data() + offset);
header.command = command;
header.arguments_size_elements = uint32_t(arguments_size_elements);
return command_stream_.data() + (offset + kCommandHeaderSizeElements);
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,120 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_
#define XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "xenia/base/math.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanCommandProcessor;
class DeferredCommandBuffer {
public:
DeferredCommandBuffer(const VulkanCommandProcessor& command_processor,
size_t initial_size_bytes = 1024 * 1024);
void Reset();
void Execute(VkCommandBuffer command_buffer);
void CmdVkBindIndexBuffer(VkBuffer buffer, VkDeviceSize offset,
VkIndexType index_type) {
auto& args = *reinterpret_cast<ArgsVkBindIndexBuffer*>(WriteCommand(
Command::kVkBindIndexBuffer, sizeof(ArgsVkBindIndexBuffer)));
args.buffer = buffer;
args.offset = offset;
args.index_type = index_type;
}
void CmdVkCopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
uint32_t region_count, const VkBufferCopy* regions) {
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
const size_t header_size =
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy));
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkCopyBuffer,
header_size + sizeof(VkBufferCopy) * region_count));
auto& args = *reinterpret_cast<ArgsVkCopyBuffer*>(args_ptr);
args.src_buffer = src_buffer;
args.dst_buffer = dst_buffer;
args.region_count = region_count;
std::memcpy(args_ptr + header_size, regions,
sizeof(VkBufferCopy) * region_count);
}
// pNext of all barriers must be null.
void CmdVkPipelineBarrier(VkPipelineStageFlags src_stage_mask,
VkPipelineStageFlags dst_stage_mask,
VkDependencyFlags dependency_flags,
uint32_t memory_barrier_count,
const VkMemoryBarrier* memory_barriers,
uint32_t buffer_memory_barrier_count,
const VkBufferMemoryBarrier* buffer_memory_barriers,
uint32_t image_memory_barrier_count,
const VkImageMemoryBarrier* image_memory_barriers);
private:
enum class Command {
kVkBindIndexBuffer,
kVkCopyBuffer,
kVkPipelineBarrier,
};
struct CommandHeader {
Command command;
uint32_t arguments_size_elements;
};
static constexpr size_t kCommandHeaderSizeElements =
(sizeof(CommandHeader) + sizeof(uintmax_t) - 1) / sizeof(uintmax_t);
struct ArgsVkBindIndexBuffer {
VkBuffer buffer;
VkDeviceSize offset;
VkIndexType index_type;
};
struct ArgsVkCopyBuffer {
VkBuffer src_buffer;
VkBuffer dst_buffer;
uint32_t region_count;
// Followed by VkBufferCopy[].
};
struct ArgsVkPipelineBarrier {
VkPipelineStageFlags src_stage_mask;
VkPipelineStageFlags dst_stage_mask;
VkDependencyFlags dependency_flags;
uint32_t memory_barrier_count;
uint32_t buffer_memory_barrier_count;
uint32_t image_memory_barrier_count;
// Followed by aligned VkMemoryBarrier[], VkBufferMemoryBarrier[],
// VkImageMemoryBarrier[].
};
void* WriteCommand(Command command, size_t arguments_size_bytes);
const VulkanCommandProcessor& command_processor_;
// uintmax_t to ensure uint64_t and pointer alignment of all structures.
std::vector<uintmax_t> command_stream_;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_

View File

@ -15,6 +15,8 @@
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include "xenia/ui/vulkan/vulkan_context.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_util.h"
@ -25,7 +27,9 @@ namespace vulkan {
VulkanCommandProcessor::VulkanCommandProcessor(
VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
: CommandProcessor(graphics_system, kernel_state) {}
: CommandProcessor(graphics_system, kernel_state),
deferred_command_buffer_(*this) {}
VulkanCommandProcessor::~VulkanCommandProcessor() = default;
void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
@ -39,10 +43,12 @@ bool VulkanCommandProcessor::SetupContext() {
return false;
}
const ui::vulkan::VulkanProvider& provider =
GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
shared_memory_ =
std::make_unique<VulkanSharedMemory>(*this, *memory_, trace_writer_);
if (!shared_memory_->Initialize()) {
XELOGE("Failed to initialize shared memory");
return false;
}
return true;
}
@ -50,11 +56,14 @@ bool VulkanCommandProcessor::SetupContext() {
void VulkanCommandProcessor::ShutdownContext() {
AwaitAllQueueOperationsCompletion();
shared_memory_.reset();
const ui::vulkan::VulkanProvider& provider =
GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
deferred_command_buffer_.Reset();
for (const auto& command_buffer_pair : command_buffers_submitted_) {
dfn.vkDestroyCommandPool(device, command_buffer_pair.first.pool, nullptr);
}
@ -119,19 +128,46 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
uint32_t index_count,
IndexBufferInfo* index_buffer_info,
bool major_mode_explicit) {
#if FINE_GRAINED_DRAW_SCOPES
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
BeginSubmission(true);
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
// Actually draw.
if (indexed) {
uint32_t index_size =
index_buffer_info->format == xenos::IndexFormat::kInt32
? sizeof(uint32_t)
: sizeof(uint16_t);
assert_false(index_buffer_info->guest_base & (index_size - 1));
uint32_t index_base =
index_buffer_info->guest_base & 0x1FFFFFFF & ~(index_size - 1);
uint32_t index_buffer_size = index_buffer_info->count * index_size;
if (!shared_memory_->RequestRange(index_base, index_buffer_size)) {
XELOGE(
"Failed to request index buffer at 0x{:08X} (size {}) in the shared "
"memory",
index_base, index_buffer_size);
return false;
}
deferred_command_buffer_.CmdVkBindIndexBuffer(
shared_memory_->buffer(), index_base,
index_buffer_info->format == xenos::IndexFormat::kInt32
? VK_INDEX_TYPE_UINT32
: VK_INDEX_TYPE_UINT16);
}
shared_memory_->Use(VulkanSharedMemory::Usage::kRead);
return true;
}
bool VulkanCommandProcessor::IssueCopy() {
#if FINE_GRAINED_DRAW_SCOPES
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
BeginSubmission(true);
@ -217,12 +253,14 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
command_buffers_writable_.push_back(command_buffer_pair.first);
command_buffers_submitted_.pop_front();
}
shared_memory_->CompletedSubmissionUpdated();
}
void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) {
#if FINE_GRAINED_DRAW_SCOPES
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
bool is_opening_frame = is_guest_command && !frame_open_;
if (submission_open_ && !is_opening_frame) {
@ -257,6 +295,11 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) {
if (!submission_open_) {
submission_open_ = true;
// Start a new deferred command buffer - will submit it to the real one in
// the end of the submission (when async pipeline state object creation
// requests are fulfilled).
deferred_command_buffer_.Reset();
}
if (is_opening_frame) {
@ -321,6 +364,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
bool is_closing_frame = is_swap && frame_open_;
if (submission_open_) {
shared_memory_->EndSubmission();
assert_false(command_buffers_writable_.empty());
CommandBuffer command_buffer = command_buffers_writable_.back();
if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) {
@ -339,6 +384,7 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
XELOGE("Failed to begin a Vulkan command buffer");
return false;
}
deferred_command_buffer_.Execute(command_buffer.buffer);
// TODO(Triang3l): Write deferred command buffer commands.
if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) {
XELOGE("Failed to end a Vulkan command buffer");

View File

@ -16,7 +16,9 @@
#include <vector>
#include "xenia/gpu/command_processor.h"
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include "xenia/gpu/xenos.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/ui/vulkan/vulkan_context.h"
@ -39,6 +41,13 @@ class VulkanCommandProcessor : public CommandProcessor {
return static_cast<ui::vulkan::VulkanContext&>(*context_);
}
// Returns the deferred drawing command list for the currently open
// submission.
DeferredCommandBuffer& deferred_command_buffer() {
assert_true(submission_open_);
return deferred_command_buffer_;
}
uint64_t GetCurrentSubmission() const {
return submission_completed_ +
uint64_t(submissions_in_flight_fences_.size()) + 1;
@ -113,6 +122,9 @@ class VulkanCommandProcessor : public CommandProcessor {
};
std::vector<CommandBuffer> command_buffers_writable_;
std::deque<std::pair<CommandBuffer, uint64_t>> command_buffers_submitted_;
DeferredCommandBuffer deferred_command_buffer_;
std::unique_ptr<VulkanSharedMemory> shared_memory_;
};
} // namespace vulkan

View File

@ -0,0 +1,309 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include <algorithm>
#include <cstring>
#include <utility>
#include <vector>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace gpu {
namespace vulkan {
VulkanSharedMemory::VulkanSharedMemory(
VulkanCommandProcessor& command_processor, Memory& memory,
TraceWriter& trace_writer)
: SharedMemory(memory),
command_processor_(command_processor),
trace_writer_(trace_writer) {}
VulkanSharedMemory::~VulkanSharedMemory() { Shutdown(true); }
bool VulkanSharedMemory::Initialize() {
InitializeCommon();
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
VkBufferCreateInfo buffer_create_info;
buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
buffer_create_info.pNext = nullptr;
buffer_create_info.flags = 0;
const VkBufferCreateFlags sparse_flags =
VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
// TODO(Triang3l): Sparse binding.
buffer_create_info.size = kBufferSize;
buffer_create_info.usage =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
buffer_create_info.queueFamilyIndexCount = 0;
buffer_create_info.pQueueFamilyIndices = nullptr;
VkResult buffer_create_result =
dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_);
if (buffer_create_result != VK_SUCCESS) {
if (buffer_create_info.flags & sparse_flags) {
buffer_create_info.flags &= ~sparse_flags;
buffer_create_result =
dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_);
}
if (buffer_create_result != VK_SUCCESS) {
XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer",
kBufferSize >> 20);
Shutdown();
return false;
}
}
VkMemoryRequirements buffer_memory_requirements;
dfn.vkGetBufferMemoryRequirements(device, buffer_,
&buffer_memory_requirements);
// TODO(Triang3l): Determine sparse binding properties from memory
// requirements.
if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
provider.memory_types_device_local(),
&buffer_memory_type_)) {
XELOGE(
"Shared memory: Failed to get a device-local Vulkan memory type for "
"the buffer");
Shutdown();
return false;
}
if (!(buffer_create_info.flags & sparse_flags)) {
VkMemoryAllocateInfo buffer_memory_allocate_info;
buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
buffer_memory_allocate_info.pNext = nullptr;
buffer_memory_allocate_info.allocationSize =
buffer_memory_requirements.size;
buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
VkDeviceMemory buffer_memory;
if (dfn.vkAllocateMemory(device, &buffer_memory_allocate_info, nullptr,
&buffer_memory) != VK_SUCCESS) {
XELOGE(
"Shared memory: Failed to allocate {} MB of memory for the Vulkan "
"buffer",
kBufferSize >> 20);
Shutdown();
return false;
}
buffer_memory_.push_back(buffer_memory);
if (dfn.vkBindBufferMemory(device, buffer_, buffer_memory, 0) !=
VK_SUCCESS) {
XELOGE("Shared memory: Failed to bind memory to the Vulkan buffer");
Shutdown();
return false;
}
}
upload_buffer_pool_ = std::make_unique<ui::vulkan::VulkanUploadBufferPool>(
provider, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
xe::align(ui::vulkan::VulkanUploadBufferPool::kDefaultPageSize,
size_t(1) << page_size_log2()));
return true;
}
void VulkanSharedMemory::Shutdown(bool from_destructor) {
upload_buffer_pool_.reset();
last_written_range_ = std::make_pair<uint32_t, uint32_t>(0, 0);
last_usage_ = Usage::kTransferDestination;
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_);
buffer_memory_allocated_.clear();
for (VkDeviceMemory memory : buffer_memory_) {
dfn.vkFreeMemory(device, memory, nullptr);
}
buffer_memory_.clear();
// If calling from the destructor, the SharedMemory destructor will call
// ShutdownCommon.
if (!from_destructor) {
ShutdownCommon();
}
}
void VulkanSharedMemory::CompletedSubmissionUpdated() {
upload_buffer_pool_->Reclaim(command_processor_.GetCompletedSubmission());
}
void VulkanSharedMemory::EndSubmission() { upload_buffer_pool_->FlushWrites(); }
void VulkanSharedMemory::Use(Usage usage,
std::pair<uint32_t, uint32_t> written_range) {
written_range.first = std::min(written_range.first, kBufferSize);
written_range.second =
std::min(written_range.second, kBufferSize - written_range.first);
assert_true(usage != Usage::kRead || !written_range.second);
if (last_usage_ != usage || last_written_range_.second) {
VkPipelineStageFlags stage_mask_src, stage_mask_dst;
VkBufferMemoryBarrier buffer_memory_barrier;
GetBarrier(last_usage_, stage_mask_src,
buffer_memory_barrier.srcAccessMask);
GetBarrier(usage, stage_mask_dst, buffer_memory_barrier.dstAccessMask);
buffer_memory_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
buffer_memory_barrier.pNext = nullptr;
buffer_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_memory_barrier.buffer = buffer_;
if (last_usage_ == usage) {
// Committing the previous write.
buffer_memory_barrier.offset = VkDeviceSize(last_written_range_.first);
buffer_memory_barrier.size = VkDeviceSize(last_written_range_.second);
} else {
// Changing the stage and access mask - all preceding writes must be
// available not only to the source stage, but to the destination as well.
buffer_memory_barrier.offset = 0;
buffer_memory_barrier.size = VK_WHOLE_SIZE;
last_usage_ = usage;
}
command_processor_.deferred_command_buffer().CmdVkPipelineBarrier(
stage_mask_src, stage_mask_dst, 0, 0, nullptr, 1,
&buffer_memory_barrier, 0, nullptr);
}
last_written_range_ = written_range;
}
bool VulkanSharedMemory::EnsureHostGpuMemoryAllocated(uint32_t start,
uint32_t length) {
// TODO(Triang3l): Do sparse binding.
return true;
}
bool VulkanSharedMemory::UploadRanges(
const std::vector<std::pair<uint32_t, uint32_t>>& upload_page_ranges) {
if (upload_page_ranges.empty()) {
return true;
}
// upload_page_ranges are sorted, use them to determine the range for the
// ordering barrier.
Use(Usage::kTransferDestination,
std::make_pair(
upload_page_ranges.front().first << page_size_log2(),
(upload_page_ranges.back().first + upload_page_ranges.back().second -
upload_page_ranges.front().first)
<< page_size_log2()));
DeferredCommandBuffer& command_buffer =
command_processor_.deferred_command_buffer();
uint64_t submission_current = command_processor_.GetCurrentSubmission();
bool successful = true;
upload_regions_.clear();
VkBuffer upload_buffer_previous = VK_NULL_HANDLE;
for (auto upload_range : upload_page_ranges) {
uint32_t upload_range_start = upload_range.first;
uint32_t upload_range_length = upload_range.second;
trace_writer_.WriteMemoryRead(upload_range_start << page_size_log2(),
upload_range_length << page_size_log2());
while (upload_range_length) {
VkBuffer upload_buffer;
VkDeviceSize upload_buffer_offset, upload_buffer_size;
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
submission_current, upload_range_length << page_size_log2(),
size_t(1) << page_size_log2(), upload_buffer, upload_buffer_offset,
upload_buffer_size);
if (upload_buffer_mapping == nullptr) {
XELOGE("Shared memory: Failed to get a Vulkan upload buffer");
successful = false;
break;
}
MakeRangeValid(upload_range_start << page_size_log2(),
uint32_t(upload_buffer_size), false);
std::memcpy(
upload_buffer_mapping,
memory().TranslatePhysical(upload_range_start << page_size_log2()),
upload_buffer_size);
if (upload_buffer_previous != upload_buffer && !upload_regions_.empty()) {
assert_true(upload_buffer_previous != VK_NULL_HANDLE);
command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_,
uint32_t(upload_regions_.size()),
upload_regions_.data());
upload_regions_.clear();
}
upload_buffer_previous = upload_buffer;
VkBufferCopy& upload_region = upload_regions_.emplace_back();
upload_region.srcOffset = upload_buffer_offset;
upload_region.dstOffset =
VkDeviceSize(upload_range_start << page_size_log2());
upload_region.size = upload_buffer_size;
uint32_t upload_buffer_pages =
uint32_t(upload_buffer_size >> page_size_log2());
upload_range_start += upload_buffer_pages;
upload_range_length -= upload_buffer_pages;
}
if (!successful) {
break;
}
}
if (!upload_regions_.empty()) {
assert_true(upload_buffer_previous != VK_NULL_HANDLE);
command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_,
uint32_t(upload_regions_.size()),
upload_regions_.data());
upload_regions_.clear();
}
return successful;
}
void VulkanSharedMemory::GetBarrier(Usage usage,
VkPipelineStageFlags& stage_mask,
VkAccessFlags& access_mask) const {
switch (usage) {
case Usage::kComputeWrite:
stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_mask = VK_ACCESS_SHADER_READ_BIT;
return;
case Usage::kTransferDestination:
stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
access_mask = VK_ACCESS_TRANSFER_WRITE_BIT;
return;
}
stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanContext().GetVulkanProvider();
if (provider.device_features().tessellationShader) {
stage_mask |= VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
}
access_mask = VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_SHADER_READ_BIT;
switch (usage) {
case Usage::kRead:
stage_mask |=
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
access_mask |= VK_ACCESS_TRANSFER_READ_BIT;
break;
case Usage::kGuestDrawReadWrite:
access_mask |= VK_ACCESS_SHADER_WRITE_BIT;
break;
default:
assert_unhandled_case(usage);
}
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,97 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_
#define XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>
#include "xenia/gpu/shared_memory.h"
#include "xenia/gpu/trace_writer.h"
#include "xenia/memory.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanCommandProcessor;
class VulkanSharedMemory : public SharedMemory {
public:
VulkanSharedMemory(VulkanCommandProcessor& command_processor, Memory& memory,
TraceWriter& trace_writer);
~VulkanSharedMemory() override;
bool Initialize();
void Shutdown(bool from_destructor = false);
void CompletedSubmissionUpdated();
void EndSubmission();
enum class Usage {
// Index buffer, vfetch, compute read, transfer source.
kRead,
// Index buffer, vfetch, memexport.
kGuestDrawReadWrite,
kComputeWrite,
kTransferDestination,
};
// Places pipeline barrier for the target usage, also ensuring writes of
// adjacent are ordered with writes of each other and reads.
void Use(Usage usage, std::pair<uint32_t, uint32_t> written_range = {});
VkBuffer buffer() const { return buffer_; }
protected:
bool EnsureHostGpuMemoryAllocated(uint32_t start, uint32_t length) override;
bool UploadRanges(const std::vector<std::pair<uint32_t, uint32_t>>&
upload_page_ranges) override;
private:
bool IsSparse() const {
return buffer_allocation_size_log2_ < kBufferSizeLog2;
}
void GetBarrier(Usage usage, VkPipelineStageFlags& stage_mask,
VkAccessFlags& access_mask) const;
VulkanCommandProcessor& command_processor_;
TraceWriter& trace_writer_;
VkBuffer buffer_ = VK_NULL_HANDLE;
uint32_t buffer_memory_type_;
// Maximum of 1024 allocations in the worst case for all of the buffer because
// of the overall 4096 allocation count limit on Windows drivers.
static constexpr uint32_t kMinBufferAllocationSizeLog2 =
std::max(kOptimalAllocationLog2, kBufferSizeLog2 - 10);
uint32_t buffer_allocation_size_log2_ = kBufferSizeLog2;
// Sparse memory allocations, of different sizes.
std::vector<VkDeviceMemory> buffer_memory_;
// One bit per every 2^buffer_allocation_size_log2_ of the buffer.
std::vector<uint64_t> buffer_memory_allocated_;
// First usage will likely be uploading.
Usage last_usage_ = Usage::kTransferDestination;
std::pair<uint32_t, uint32_t> last_written_range_ = {};
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool> upload_buffer_pool_;
std::vector<VkBufferCopy> upload_regions_;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_

View File

@ -538,6 +538,7 @@ bool VulkanProvider::Initialize() {
XE_VULKAN_LOAD_DFN(vkCmdBindPipeline);
XE_VULKAN_LOAD_DFN(vkCmdBindVertexBuffers);
XE_VULKAN_LOAD_DFN(vkCmdClearColorImage);
XE_VULKAN_LOAD_DFN(vkCmdCopyBuffer);
XE_VULKAN_LOAD_DFN(vkCmdCopyBufferToImage);
XE_VULKAN_LOAD_DFN(vkCmdDraw);
XE_VULKAN_LOAD_DFN(vkCmdDrawIndexed);

View File

@ -35,6 +35,8 @@
#define XELOGVK XELOGI
#define XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES 1
namespace xe {
namespace ui {
namespace vulkan {
@ -139,6 +141,7 @@ class VulkanProvider : public GraphicsProvider {
PFN_vkCmdBindPipeline vkCmdBindPipeline;
PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers;
PFN_vkCmdClearColorImage vkCmdClearColorImage;
PFN_vkCmdCopyBuffer vkCmdCopyBuffer;
PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage;
PFN_vkCmdDraw vkCmdDraw;
PFN_vkCmdDrawIndexed vkCmdDrawIndexed;