vk: Reimplement events using synchronization2 extension

This commit is contained in:
kd-11 2023-06-22 00:19:03 +03:00 committed by kd-11
parent 00cca7be69
commit 850166eca1
6 changed files with 183 additions and 69 deletions

View File

@ -478,7 +478,7 @@ void VKGSRender::load_texture_env()
// Sync any async scheduler tasks
if (auto ev = async_task_scheduler.get_primary_sync_label())
{
ev->gpu_wait(*m_current_command_buffer);
ev->gpu_wait(*m_current_command_buffer, { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR });
}
}
}

View File

@ -191,9 +191,23 @@ namespace vk
src->pop_layout(cmd);
VkMemoryBarrier2KHR copy_memory_barrier = {
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR,
.pNext = nullptr,
.srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT_KHR,
.srcAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR | VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
.dstStageMask = VK_PIPELINE_STAGE_2_NONE_KHR,
.dstAccessMask = 0
};
// Create event object for this transfer and queue signal op
dma_fence = std::make_unique<vk::event>(*m_device, sync_domain::any);
dma_fence->signal(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
dma_fence->signal(cmd,
{
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.memoryBarrierCount = 1,
.pMemoryBarriers = &copy_memory_barrier
});
// Set cb flag for queued dma operations
cmd.set_flag(vk::command_buffer::cb_has_dma_transfer);

View File

@ -113,6 +113,7 @@ namespace vk
optional_features_support.conditional_rendering = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
optional_features_support.external_memory_host = device_extensions.is_supported(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
optional_features_support.sampler_mirror_clamped = device_extensions.is_supported(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME);
optional_features_support.synchronization_2 = device_extensions.is_supported(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
optional_features_support.unrestricted_depth_range = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
optional_features_support.debug_utils = instance_extensions.is_supported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
@ -486,6 +487,11 @@ namespace vk
requested_extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
}
if (pgpu->optional_features_support.synchronization_2)
{
requested_extensions.push_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
}
enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE;
@ -670,6 +676,14 @@ namespace vk
device.pNext = &custom_border_color_features;
}
VkPhysicalDeviceSynchronization2FeaturesKHR synchronization2_info{};
if (pgpu->optional_features_support.synchronization_2)
{
synchronization2_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES;
synchronization2_info.pNext = const_cast<void*>(device.pNext);
device.pNext = &synchronization2_info;
}
CHECK_RESULT_EX(vkCreateDevice(*pgpu, &device, nullptr, &dev), message_on_error);
// Initialize queues
@ -695,6 +709,12 @@ namespace vk
_vkCmdInsertDebugUtilsLabelEXT = reinterpret_cast<PFN_vkCmdInsertDebugUtilsLabelEXT>(vkGetDeviceProcAddr(dev, "vkCmdInsertDebugUtilsLabelEXT"));
}
if (pgpu->optional_features_support.synchronization_2)
{
_vkCmdSetEvent2KHR = reinterpret_cast<PFN_vkCmdSetEvent2KHR>(vkGetDeviceProcAddr(dev, "vkCmdSetEvent2KHR"));
_vkCmdWaitEvents2KHR = reinterpret_cast<PFN_vkCmdWaitEvents2KHR>(vkGetDeviceProcAddr(dev, "vkCmdWaitEvents2KHR"));
}
memory_map = vk::get_memory_mapping(pdev);
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);

View File

@ -79,6 +79,7 @@ namespace vk
bool sampler_mirror_clamped = false;
bool shader_stencil_export = false;
bool surface_capabilities_2 = false;
bool synchronization_2 = false;
bool unrestricted_depth_range = false;
} optional_features_support;
@ -135,6 +136,8 @@ namespace vk
PFN_vkSetDebugUtilsObjectNameEXT _vkSetDebugUtilsObjectNameEXT = nullptr;
PFN_vkQueueInsertDebugUtilsLabelEXT _vkQueueInsertDebugUtilsLabelEXT = nullptr;
PFN_vkCmdInsertDebugUtilsLabelEXT _vkCmdInsertDebugUtilsLabelEXT = nullptr;
PFN_vkCmdSetEvent2KHR _vkCmdSetEvent2KHR = nullptr;
PFN_vkCmdWaitEvents2KHR _vkCmdWaitEvents2KHR = nullptr;
public:
render_device() = default;
@ -168,6 +171,7 @@ namespace vk
bool get_framebuffer_loops_support() const { return pgpu->optional_features_support.framebuffer_loops; }
bool get_barycoords_support() const { return pgpu->optional_features_support.barycentric_coords; }
bool get_custom_border_color_support() const { return pgpu->optional_features_support.custom_border_color; }
bool get_synchronization2_support() const { return pgpu->optional_features_support.synchronization_2; }
u64 get_descriptor_update_after_bind_support() const { return pgpu->descriptor_indexing_support.update_after_bind_mask; }
u32 get_descriptor_max_draw_calls() const { return pgpu->descriptor_max_draw_calls; }

View File

@ -15,6 +15,106 @@
namespace vk
{
// Util
namespace v1_utils
{
VkPipelineStageFlags gather_src_stages(const VkDependencyInfoKHR& dependency)
{
VkPipelineStageFlags stages = VK_PIPELINE_STAGE_NONE;
for (u32 i = 0; i < dependency.bufferMemoryBarrierCount; ++i)
{
stages |= dependency.pBufferMemoryBarriers[i].srcStageMask;
}
for (u32 i = 0; i < dependency.imageMemoryBarrierCount; ++i)
{
stages |= dependency.pImageMemoryBarriers[i].srcStageMask;
}
for (u32 i = 0; i < dependency.memoryBarrierCount; ++i)
{
stages |= dependency.pMemoryBarriers[i].srcStageMask;
}
return stages;
}
VkPipelineStageFlags gather_dst_stages(const VkDependencyInfoKHR& dependency)
{
VkPipelineStageFlags stages = VK_PIPELINE_STAGE_NONE;
for (u32 i = 0; i < dependency.bufferMemoryBarrierCount; ++i)
{
stages |= dependency.pBufferMemoryBarriers[i].dstStageMask;
}
for (u32 i = 0; i < dependency.imageMemoryBarrierCount; ++i)
{
stages |= dependency.pImageMemoryBarriers[i].dstStageMask;
}
for (u32 i = 0; i < dependency.memoryBarrierCount; ++i)
{
stages |= dependency.pMemoryBarriers[i].dstStageMask;
}
return stages;
}
auto get_memory_barriers(const VkDependencyInfoKHR& dependency)
{
std::vector<VkMemoryBarrier> result;
for (u32 i = 0; i < dependency.memoryBarrierCount; ++i)
{
result.emplace_back
(
VK_STRUCTURE_TYPE_MEMORY_BARRIER,
nullptr,
static_cast<VkAccessFlags>(dependency.pMemoryBarriers[i].srcAccessMask),
static_cast<VkAccessFlags>(dependency.pMemoryBarriers[i].dstAccessMask)
);
}
return result;
}
auto get_image_memory_barriers(const VkDependencyInfoKHR& dependency)
{
std::vector<VkImageMemoryBarrier> result;
for (u32 i = 0; i < dependency.imageMemoryBarrierCount; ++i)
{
result.emplace_back
(
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
nullptr,
static_cast<VkAccessFlags>(dependency.pImageMemoryBarriers[i].srcAccessMask),
static_cast<VkAccessFlags>(dependency.pImageMemoryBarriers[i].dstAccessMask),
dependency.pImageMemoryBarriers[i].oldLayout,
dependency.pImageMemoryBarriers[i].newLayout,
dependency.pImageMemoryBarriers[i].srcQueueFamilyIndex,
dependency.pImageMemoryBarriers[i].dstQueueFamilyIndex,
dependency.pImageMemoryBarriers[i].image,
dependency.pImageMemoryBarriers[i].subresourceRange
);
}
return result;
}
auto get_buffer_memory_barriers(const VkDependencyInfoKHR& dependency)
{
std::vector<VkBufferMemoryBarrier> result;
for (u32 i = 0; i < dependency.bufferMemoryBarrierCount; ++i)
{
result.emplace_back
(
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
static_cast<VkAccessFlags>(dependency.pBufferMemoryBarriers[i].srcAccessMask),
static_cast<VkAccessFlags>(dependency.pBufferMemoryBarriers[i].dstAccessMask),
dependency.pBufferMemoryBarriers[i].srcQueueFamilyIndex,
dependency.pBufferMemoryBarriers[i].dstQueueFamilyIndex,
dependency.pBufferMemoryBarriers[i].buffer,
dependency.pBufferMemoryBarriers[i].offset,
dependency.pBufferMemoryBarriers[i].size
);
}
return result;
}
}
// Objects
fence::fence(VkDevice dev)
{
owner = dev;
@ -75,101 +175,78 @@ namespace vk
}
event::event(const render_device& dev, sync_domain domain)
: m_device(dev)
: m_device(&dev), v2(dev.get_synchronization2_support())
{
const auto vendor = dev.gpu().get_driver_vendor();
if (domain != sync_domain::gpu &&
(vendor == vk::driver_vendor::AMD || vendor == vk::driver_vendor::INTEL))
VkEventCreateInfo info
{
// Work around AMD and INTEL broken event signal synchronization scope
// Will be dropped after transitioning to VK1.3
m_buffer = std::make_unique<buffer>
(
dev,
4,
dev.get_memory_mapping().host_visible_coherent,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
0,
VMM_ALLOCATION_POOL_SYSTEM
);
m_value = reinterpret_cast<u32*>(m_buffer->map(0, 4));
*m_value = 0xCAFEBABE;
}
else
{
VkEventCreateInfo info
{
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
.pNext = nullptr,
.flags = 0
};
vkCreateEvent(dev, &info, nullptr, &m_vk_event);
}
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
.pNext = nullptr,
.flags = 0
};
CHECK_RESULT(vkCreateEvent(dev, &info, nullptr, &m_vk_event));
}
event::~event()
{
if (m_vk_event) [[likely]]
{
vkDestroyEvent(m_device, m_vk_event, nullptr);
}
else
{
m_buffer->unmap();
m_buffer.reset();
m_value = nullptr;
vkDestroyEvent(*m_device, m_vk_event, nullptr);
}
}
void event::signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access)
void event::signal(const command_buffer& cmd, const VkDependencyInfoKHR& dependency)
{
if (m_vk_event) [[likely]]
if (v2) [[ likely ]]
{
vkCmdSetEvent(cmd, m_vk_event, stages);
m_device->_vkCmdSetEvent2KHR(cmd, m_vk_event, &dependency);
}
else
{
insert_global_memory_barrier(cmd, stages, VK_PIPELINE_STAGE_TRANSFER_BIT, access, VK_ACCESS_TRANSFER_WRITE_BIT);
vkCmdFillBuffer(cmd, m_buffer->value, 0, 4, 0xDEADBEEF);
// Legacy fallback. Should be practically unused with the exception of in-development drivers.
const auto stages = v1_utils::gather_src_stages(dependency);
vkCmdSetEvent(cmd, m_vk_event, stages);
}
}
void event::host_signal() const
{
ensure(m_vk_event);
vkSetEvent(m_device, m_vk_event);
vkSetEvent(*m_device, m_vk_event);
}
void event::gpu_wait(const command_buffer& cmd) const
void event::gpu_wait(const command_buffer& cmd, const VkDependencyInfoKHR& dependency) const
{
ensure(m_vk_event);
vkCmdWaitEvents(cmd, 1, &m_vk_event, 0, 0, 0, nullptr, 0, nullptr, 0, nullptr);
if (v2) [[ likely ]]
{
m_device->_vkCmdWaitEvents2KHR(cmd, 1, &m_vk_event, &dependency);
}
else
{
const auto src_stages = v1_utils::gather_src_stages(dependency);
const auto dst_stages = v1_utils::gather_dst_stages(dependency);
const auto memory_barriers = v1_utils::get_memory_barriers(dependency);
const auto image_memory_barriers = v1_utils::get_image_memory_barriers(dependency);
const auto buffer_memory_barriers = v1_utils::get_buffer_memory_barriers(dependency);
vkCmdWaitEvents(cmd,
1, &m_vk_event,
src_stages, dst_stages,
::size32(memory_barriers), memory_barriers.data(),
::size32(buffer_memory_barriers), buffer_memory_barriers.data(),
::size32(image_memory_barriers), image_memory_barriers.data());
}
}
void event::reset() const
{
if (m_vk_event) [[likely]]
{
vkResetEvent(m_device, m_vk_event);
}
else
{
*m_value = 0xCAFEBABE;
}
vkResetEvent(*m_device, m_vk_event);
}
VkResult event::status() const
{
if (m_vk_event) [[likely]]
{
return vkGetEventStatus(m_device, m_vk_event);
}
else
{
return (*m_value == 0xCAFEBABE) ? VK_EVENT_RESET : VK_EVENT_SET;
}
return vkGetEventStatus(*m_device, m_vk_event);
}
gpu_debug_marker_pool::gpu_debug_marker_pool(const vk::render_device& dev, u32 count)

View File

@ -9,6 +9,7 @@
namespace vk
{
class command_buffer;
class image;
enum class sync_domain
{
@ -54,20 +55,18 @@ namespace vk
class event
{
VkDevice m_device = VK_NULL_HANDLE;
const vk::render_device* m_device = nullptr;
VkEvent m_vk_event = VK_NULL_HANDLE;
std::unique_ptr<buffer> m_buffer;
volatile u32* m_value = nullptr;
bool v2 = true;
public:
event(const render_device& dev, sync_domain domain);
~event();
event(const event&) = delete;
void signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access);
void signal(const command_buffer& cmd, const VkDependencyInfoKHR& dependency);
void host_signal() const;
void gpu_wait(const command_buffer& cmd) const;
void gpu_wait(const command_buffer& cmd, const VkDependencyInfoKHR& dependency) const;
VkResult status() const;
void reset() const;
};