diff --git a/cores/libretro-test-vulkan/libretro-test.c b/cores/libretro-test-vulkan/libretro-test.c index c2ec3b7ef7..3a226609da 100644 --- a/cores/libretro-test-vulkan/libretro-test.c +++ b/cores/libretro-test-vulkan/libretro-test.c @@ -967,7 +967,7 @@ void retro_run(void) vk.index = vulkan->get_sync_index(vulkan->handle); vulkan_test_render(); - vulkan->set_image(vulkan->handle, &vk.images[vk.index], 0, NULL); + vulkan->set_image(vulkan->handle, &vk.images[vk.index], 0, NULL, VK_QUEUE_FAMILY_IGNORED); vulkan->set_command_buffers(vulkan->handle, 1, &vk.cmd[vk.index]); video_cb(RETRO_HW_FRAME_BUFFER_VALID, BASE_WIDTH, BASE_HEIGHT, 0); } diff --git a/gfx/common/vulkan_common.c b/gfx/common/vulkan_common.c index 154c793a6b..ea671a95be 100644 --- a/gfx/common/vulkan_common.c +++ b/gfx/common/vulkan_common.c @@ -104,6 +104,31 @@ uint32_t vulkan_find_memory_type_fallback( device_reqs, host_reqs_second, 0); } +void vulkan_transfer_image_ownership(VkCommandBuffer cmd, + VkImage image, VkImageLayout layout, + VkPipelineStageFlags src_stages, + VkPipelineStageFlags dst_stages, + uint32_t src_queue_family, + uint32_t dst_queue_family) +{ + VkImageMemoryBarrier barrier = + { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; + + barrier.srcAccessMask = 0; + barrier.dstAccessMask = 0; + barrier.oldLayout = layout; + barrier.newLayout = layout; + barrier.srcQueueFamilyIndex = src_queue_family; + barrier.dstQueueFamilyIndex = dst_queue_family; + barrier.image = image; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + + VKFUNC(vkCmdPipelineBarrier)(cmd, src_stages, dst_stages, + false, 0, NULL, 0, NULL, 1, &barrier); +} + void vulkan_map_persistent_texture( VkDevice device, struct vk_texture *texture) diff --git a/gfx/common/vulkan_common.h b/gfx/common/vulkan_common.h index 8a5bfe8bdd..e9dc4ad745 100644 --- a/gfx/common/vulkan_common.h +++ b/gfx/common/vulkan_common.h @@ -358,8 +358,10 @@ typedef struct vk unsigned last_width; unsigned last_height; + uint32_t src_queue_family; bool enable; + bool valid_semaphore; } hw; struct @@ -393,6 +395,13 @@ struct vk_texture vulkan_create_texture(vk_t *vk, void vulkan_transition_texture(vk_t *vk, struct vk_texture *texture); +void vulkan_transfer_image_ownership(VkCommandBuffer cmd, + VkImage image, VkImageLayout layout, + VkPipelineStageFlags src_stages, + VkPipelineStageFlags dst_stages, + uint32_t src_queue_family, + uint32_t dst_queue_family); + void vulkan_map_persistent_texture( VkDevice device, struct vk_texture *texture); diff --git a/gfx/drivers/vulkan.c b/gfx/drivers/vulkan.c index ac0cb764d8..c080747c9b 100644 --- a/gfx/drivers/vulkan.c +++ b/gfx/drivers/vulkan.c @@ -833,7 +833,8 @@ static uint32_t vulkan_get_sync_index_mask(void *handle) static void vulkan_set_image(void *handle, const struct retro_vulkan_image *image, uint32_t num_semaphores, - const VkSemaphore *semaphores) + const VkSemaphore *semaphores, + uint32_t src_queue_family) { unsigned i; vk_t *vk = (vk_t*)handle; @@ -853,6 +854,9 @@ static void vulkan_set_image(void *handle, for (i = 0; i < vk->hw.num_semaphores; i++) vk->hw.wait_dst_stages[i] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + + vk->hw.valid_semaphore = true; + vk->hw.src_queue_family = src_queue_family; } } @@ -1446,6 +1450,7 @@ static bool vulkan_frame(void *data, const void *frame, static struct retro_perf_counter copy_frame = {0}; static struct retro_perf_counter swapbuffers = {0}; static struct retro_perf_counter queue_submit = {0}; + bool waits_for_semaphores = false; VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; @@ -1488,6 +1493,26 @@ static bool vulkan_frame(void *data, const void *frame, vulkan_flush_caches(vk); + waits_for_semaphores = vk->hw.enable && frame && + !vk->hw.num_cmd && vk->hw.valid_semaphore; + + if (waits_for_semaphores && + vk->hw.src_queue_family != VK_QUEUE_FAMILY_IGNORED && + vk->hw.src_queue_family != vk->context->graphics_queue_index) + { + retro_assert(vk->hw.image); + + /* Acquire ownership of image from other queue family. */ + vulkan_transfer_image_ownership(vk->cmd, + vk->hw.image->create_info.image, + vk->hw.image->image_layout, + /* Create a dependency chain from semaphore wait. */ + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT, + vk->hw.src_queue_family, vk->context->graphics_queue_index); + } + /* Upload texture */ performance_counter_start(©_frame); if (frame && !vk->hw.enable) @@ -1717,6 +1742,21 @@ static bool vulkan_frame(void *data, const void *frame, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); } + if (waits_for_semaphores && + vk->hw.src_queue_family != VK_QUEUE_FAMILY_IGNORED && + vk->hw.src_queue_family != vk->context->graphics_queue_index) + { + retro_assert(vk->hw.image); + + /* Release ownership of image back to other queue family. */ + vulkan_transfer_image_ownership(vk->cmd, + vk->hw.image->create_info.image, + vk->hw.image->image_layout, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + vk->context->graphics_queue_index, vk->hw.src_queue_family); + } + performance_counter_start(&end_cmd); VKFUNC(vkEndCommandBuffer)(vk->cmd); performance_counter_stop(&end_cmd); @@ -1739,11 +1779,14 @@ static bool vulkan_frame(void *data, const void *frame, submit_info.pCommandBuffers = &vk->cmd; } - if (vk->hw.enable && frame && !vk->hw.num_cmd) + if (waits_for_semaphores) { submit_info.waitSemaphoreCount = vk->hw.num_semaphores; submit_info.pWaitSemaphores = vk->hw.semaphores; submit_info.pWaitDstStageMask = vk->hw.wait_dst_stages; + + /* Consume the semaphores. */ + vk->hw.valid_semaphore = false; } submit_info.signalSemaphoreCount = diff --git a/libretro-common/include/libretro.h b/libretro-common/include/libretro.h index d5c3876fdc..e6809262a8 100644 --- a/libretro-common/include/libretro.h +++ b/libretro-common/include/libretro.h @@ -955,6 +955,27 @@ struct retro_hw_render_interface * This must be called before the first call to retro_run. */ +enum retro_hw_render_context_negotiation_interface_type +{ + RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_VULKAN = 0, + RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_DUMMY = INT_MAX +}; + +/* Base struct. All retro_hw_render_context_negotiation_interface_* types + * contain at least these fields. */ +struct retro_hw_render_context_negotiation_interface +{ + enum retro_hw_render_context_negotiation_interface_type interface_type; + unsigned interface_version; +}; +#define RETRO_ENVIRONMENT_SET_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE (43 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const struct retro_hw_render_context_negotiation_interface * -- + * Sets an interface which lets the libretro core negotiate with frontend how a context is created. + * The semantics of this interface depends on which API is used in SET_HW_RENDER earlier. + * This interface will be used when the frontend is trying to create a HW rendering context, + * so it will be used after SET_HW_RENDER, but before the context_reset callback. + */ + #define RETRO_MEMDESC_CONST (1 << 0) /* The frontend will never change this memory area once retro_load_game has returned. */ #define RETRO_MEMDESC_BIGENDIAN (1 << 1) /* The memory area contains big endian data. Default is little endian. */ #define RETRO_MEMDESC_ALIGN_2 (1 << 16) /* All memory access in this area is aligned to their own size, or 2, whichever is smaller. */ diff --git a/libretro-common/include/libretro_vulkan.h b/libretro-common/include/libretro_vulkan.h index 55375e1399..b761e0afea 100644 --- a/libretro-common/include/libretro_vulkan.h +++ b/libretro-common/include/libretro_vulkan.h @@ -26,7 +26,7 @@ #include #include -#define RETRO_HW_RENDER_INTERFACE_VULKAN_VERSION 2 +#define RETRO_HW_RENDER_INTERFACE_VULKAN_VERSION 3 struct retro_vulkan_image { @@ -38,7 +38,9 @@ struct retro_vulkan_image typedef void (*retro_vulkan_set_image_t)(void *handle, const struct retro_vulkan_image *image, uint32_t num_semaphores, - const VkSemaphore *semaphores); + const VkSemaphore *semaphores, + uint32_t src_queue_family); + typedef uint32_t (*retro_vulkan_get_sync_index_t)(void *handle); typedef uint32_t (*retro_vulkan_get_sync_index_mask_t)(void *handle); typedef void (*retro_vulkan_set_command_buffers_t)(void *handle, @@ -105,6 +107,15 @@ struct retro_hw_render_interface_vulkan * semaphores provided to be signaled before using the results further * in the pipeline. * + * Semaphores provided by a single call to set_image will only be + * waited for once (waiting for a semaphore resets it). + * E.g. set_image, video_refresh, and then another + * video_refresh without set_image, + * but same image will only wait for semaphores once. + * + * For this reason, ownership transfer will only occur if semaphores + * are waited on for a particular frame in the frontend. + * * Using semaphores is optional for synchronization purposes, * but if not using * semaphores, an image memory barrier in vkCmdPipelineBarrier @@ -163,7 +174,41 @@ struct retro_hw_render_interface_vulkan * retro_video_refresh_t should be extended if frame duping is used * so that the frontend can reuse the older pointer. * - * If frame duping is used, the frontend will not wait for any semaphores. + * The image itself however, must not be touched by the core until + * wait_sync_index has been completed later. The frontend may perform + * layout transitions on the image, so even read-only access is not defined. + * The exception to read-only rule is if GENERAL layout is used for the image. + * In this case, the frontend is not allowed to perform any layout transitions, + * so concurrent reads from core and frontend are allowed. + * + * If frame duping is used, or if set_command_buffers is used, + * the frontend will not wait for any semaphores. + * + * The src_queue_family is used to specify which queue family + * the image is currently owned by. If using multiple queue families + * (e.g. async compute), the frontend will need to acquire ownership of the + * image before rendering with it and release the image afterwards. + * + * If src_queue_family is equal to the queue family (queue_index), + * no ownership transfer will occur. + * Similarly, if src_queue_family is VK_QUEUE_FAMILY_IGNORED, + * no ownership transfer will occur. + * + * The frontend will always release ownership back to src_queue_family. + * Waiting for frontend to complete with wait_sync_index() ensures that + * the frontend has released ownership back to the application. + * Note that in Vulkan, transfering ownership is a two-part process. + * + * Example frame: + * - core releases ownership from src_queue_index to queue_index with VkImageMemoryBarrier. + * - core calls set_image with src_queue_index. + * - Frontend will acquire the image with src_queue_index -> queue_index as well, completing the ownership transfer. + * - Frontend renders the frame. + * - Frontend releases ownership with queue_index -> src_queue_index. + * - Next time image is used, core must acquire ownership from queue_index ... + * + * Since the frontend releases ownership, we cannot necessarily dupe the frame because + * the core needs to make the roundtrip of ownership transfer. */ retro_vulkan_set_image_t set_image;