diff --git a/gfx/common/vulkan_common.c b/gfx/common/vulkan_common.c index c52d3e1d82..88b8dda6b7 100644 --- a/gfx/common/vulkan_common.c +++ b/gfx/common/vulkan_common.c @@ -1746,6 +1746,22 @@ static bool vulkan_context_init_device(gfx_ctx_vulkan_data_t *vk) vk->emulate_mailbox = vk->fullscreen; #endif + /* If we're emulating mailbox, stick to using fences rather than semaphores. + * Avoids some really weird driver bugs. */ + if (!vk->emulate_mailbox) + { + if (vk->context.gpu_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) + { + vk->use_wsi_semaphore = true; + RARCH_LOG("[Vulkan]: Using semaphores for WSI acquire.\n"); + } + else + { + vk->use_wsi_semaphore = false; + RARCH_LOG("[Vulkan]: Using fences for WSI acquire.\n"); + } + } + RARCH_LOG("[Vulkan]: Using GPU: %s\n", vk->context.gpu_properties.deviceName); { @@ -2555,10 +2571,28 @@ static void vulkan_destroy_swapchain(gfx_ctx_vulkan_data_t *vk) if (vk->context.swapchain_fences[i] != VK_NULL_HANDLE) vkDestroyFence(vk->context.device, vk->context.swapchain_fences[i], NULL); + if (vk->context.swapchain_recycled_semaphores[i] != VK_NULL_HANDLE) + vkDestroySemaphore(vk->context.device, + vk->context.swapchain_recycled_semaphores[i], NULL); + if (vk->context.swapchain_wait_semaphores[i] != VK_NULL_HANDLE) + vkDestroySemaphore(vk->context.device, + vk->context.swapchain_wait_semaphores[i], NULL); } - memset(vk->context.swapchain_semaphores, 0, sizeof(vk->context.swapchain_semaphores)); - memset(vk->context.swapchain_fences, 0, sizeof(vk->context.swapchain_fences)); + if (vk->context.swapchain_acquire_semaphore != VK_NULL_HANDLE) + vkDestroySemaphore(vk->context.device, + vk->context.swapchain_acquire_semaphore, NULL); + vk->context.swapchain_acquire_semaphore = VK_NULL_HANDLE; + + memset(vk->context.swapchain_semaphores, 0, + sizeof(vk->context.swapchain_semaphores)); + memset(vk->context.swapchain_recycled_semaphores, 0, + sizeof(vk->context.swapchain_recycled_semaphores)); + memset(vk->context.swapchain_wait_semaphores, 0, + sizeof(vk->context.swapchain_wait_semaphores)); + memset(vk->context.swapchain_fences, 0, + sizeof(vk->context.swapchain_fences)); + vk->context.num_recycled_acquire_semaphores = 0; } void vulkan_present(gfx_ctx_vulkan_data_t *vk, unsigned index) @@ -2673,6 +2707,12 @@ void vulkan_context_destroy(gfx_ctx_vulkan_data_t *vk, } } +static void vulkan_recycle_acquire_semaphore(struct vulkan_context *ctx, VkSemaphore sem) +{ + assert(ctx->num_recycled_acquire_semaphores < VULKAN_MAX_SWAPCHAIN_IMAGES); + ctx->swapchain_recycled_semaphores[ctx->num_recycled_acquire_semaphores++] = sem; +} + static void vulkan_acquire_clear_fences(gfx_ctx_vulkan_data_t *vk) { unsigned i; @@ -2685,11 +2725,31 @@ static void vulkan_acquire_clear_fences(gfx_ctx_vulkan_data_t *vk) vk->context.swapchain_fences[i] = VK_NULL_HANDLE; } vk->context.swapchain_fences_signalled[i] = false; + + if (vk->context.swapchain_wait_semaphores[i]) + vulkan_recycle_acquire_semaphore(&vk->context, vk->context.swapchain_wait_semaphores[i]); + vk->context.swapchain_wait_semaphores[i] = VK_NULL_HANDLE; } vk->context.current_frame_index = 0; } +static VkSemaphore vulkan_get_wsi_acquire_semaphore(struct vulkan_context *ctx) +{ + if (ctx->num_recycled_acquire_semaphores == 0) + { + VkSemaphoreCreateInfo sem_info = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO }; + vkCreateSemaphore(ctx->device, &sem_info, NULL, + &ctx->swapchain_recycled_semaphores[ctx->num_recycled_acquire_semaphores++]); + } + + VkSemaphore sem = + ctx->swapchain_recycled_semaphores[--ctx->num_recycled_acquire_semaphores]; + ctx->swapchain_recycled_semaphores[ctx->num_recycled_acquire_semaphores] = + VK_NULL_HANDLE; + return sem; +} + static void vulkan_acquire_wait_fences(gfx_ctx_vulkan_data_t *vk) { VkFenceCreateInfo fence_info = @@ -2712,6 +2772,10 @@ static void vulkan_acquire_wait_fences(gfx_ctx_vulkan_data_t *vk) else vkCreateFence(vk->context.device, &fence_info, NULL, next_fence); vk->context.swapchain_fences_signalled[index] = false; + + if (vk->context.swapchain_wait_semaphores[index] != VK_NULL_HANDLE) + vulkan_recycle_acquire_semaphore(&vk->context, vk->context.swapchain_wait_semaphores[index]); + vk->context.swapchain_wait_semaphores[index] = VK_NULL_HANDLE; } static void vulkan_create_wait_fences(gfx_ctx_vulkan_data_t *vk) @@ -2734,7 +2798,8 @@ void vulkan_acquire_next_image(gfx_ctx_vulkan_data_t *vk) { unsigned index; VkResult err; - VkFence fence; + VkFence fence = VK_NULL_HANDLE; + VkSemaphore semaphore = VK_NULL_HANDLE; VkFenceCreateInfo fence_info = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; VkSemaphoreCreateInfo sem_info = @@ -2756,6 +2821,7 @@ retry: { /* We still don't have a swapchain, so just fake it ... */ vk->context.current_swapchain_index = 0; + vk->context.current_frame_index = 0; vulkan_acquire_clear_fences(vk); vulkan_acquire_wait_fences(vk); vk->context.invalid_swapchain = true; @@ -2772,14 +2838,17 @@ retry: * MAILBOX would do. */ err = vulkan_emulated_mailbox_acquire_next_image( &vk->mailbox, &vk->context.current_swapchain_index); - fence = VK_NULL_HANDLE; } else { - vkCreateFence(vk->context.device, &fence_info, NULL, &fence); + if (vk->use_wsi_semaphore) + semaphore = vulkan_get_wsi_acquire_semaphore(&vk->context); + else + vkCreateFence(vk->context.device, &fence_info, NULL, &fence); + err = vkAcquireNextImageKHR(vk->context.device, vk->swapchain, UINT64_MAX, - VK_NULL_HANDLE, fence, &vk->context.current_swapchain_index); + semaphore, fence, &vk->context.current_swapchain_index); #ifdef ANDROID /* VK_SUBOPTIMAL_KHR can be returned on Android 10 @@ -2796,9 +2865,27 @@ retry: if (fence != VK_NULL_HANDLE) vkWaitForFences(vk->context.device, 1, &fence, true, UINT64_MAX); vk->context.has_acquired_swapchain = true; + + if (vk->context.swapchain_acquire_semaphore) + { +#ifdef HAVE_THREADS + slock_lock(vk->context.queue_lock); +#endif + RARCH_LOG("[Vulkan]: Destroying stale acquire semaphore.\n"); + vkDeviceWaitIdle(vk->context.device); + vkDestroySemaphore(vk->context.device, vk->context.swapchain_acquire_semaphore, NULL); +#ifdef HAVE_THREADS + slock_unlock(vk->context.queue_lock); +#endif + } + vk->context.swapchain_acquire_semaphore = semaphore; } else + { vk->context.has_acquired_swapchain = false; + if (semaphore) + vulkan_recycle_acquire_semaphore(&vk->context, semaphore); + } #ifdef WSI_HARDENING_TEST trigger_spurious_error_vkresult(&err); @@ -2809,9 +2896,7 @@ retry: if (err == VK_NOT_READY || err == VK_TIMEOUT) { - /* Just pretend we have a swapchain index, round-robin style. */ - vk->context.current_swapchain_index = - (vk->context.current_swapchain_index + 1) % vk->context.num_swapchain_images; + /* Do nothing. */ } else if (err == VK_ERROR_OUT_OF_DATE_KHR || err == VK_SUBOPTIMAL_KHR) { diff --git a/gfx/common/vulkan_common.h b/gfx/common/vulkan_common.h index d44babf57c..475d6afb7a 100644 --- a/gfx/common/vulkan_common.h +++ b/gfx/common/vulkan_common.h @@ -118,6 +118,11 @@ typedef struct vulkan_context VkSemaphore swapchain_semaphores[VULKAN_MAX_SWAPCHAIN_IMAGES]; VkFormat swapchain_format; + VkSemaphore swapchain_acquire_semaphore; + unsigned num_recycled_acquire_semaphores; + VkSemaphore swapchain_recycled_semaphores[VULKAN_MAX_SWAPCHAIN_IMAGES]; + VkSemaphore swapchain_wait_semaphores[VULKAN_MAX_SWAPCHAIN_IMAGES]; + slock_t *queue_lock; retro_vulkan_destroy_device_t destroy_device; @@ -154,6 +159,10 @@ typedef struct gfx_ctx_vulkan_data bool created_new_swapchain; bool emulate_mailbox; bool emulating_mailbox; + /* If set, prefer a path where we use + * semaphores instead of fences for vkAcquireNextImageKHR. + * Helps workaround certain performance issues on some drivers. */ + bool use_wsi_semaphore; vulkan_context_t context; VkSurfaceKHR vk_surface; VkSwapchainKHR swapchain; @@ -421,7 +430,7 @@ typedef struct vk struct retro_hw_render_interface_vulkan iface; const struct retro_vulkan_image *image; - const VkSemaphore *semaphores; + VkSemaphore *semaphores; VkSemaphore signal_semaphore; VkPipelineStageFlags *wait_dst_stages; VkCommandBuffer *cmd; diff --git a/gfx/drivers/vulkan.c b/gfx/drivers/vulkan.c index 12b90e1e05..db07f7e311 100644 --- a/gfx/drivers/vulkan.c +++ b/gfx/drivers/vulkan.c @@ -909,6 +909,7 @@ static void vulkan_deinit_static_resources(vk_t *vk) vk->staging_pool, NULL); free(vk->hw.cmd); free(vk->hw.wait_dst_stages); + free(vk->hw.semaphores); for (i = 0; i < VULKAN_MAX_SWAPCHAIN_IMAGES; i++) if (vk->readback.staging[i].memory != VK_NULL_HANDLE) @@ -1002,21 +1003,27 @@ static void vulkan_set_image(void *handle, vk->hw.image = image; vk->hw.num_semaphores = num_semaphores; - vk->hw.semaphores = semaphores; if (num_semaphores > 0) { - VkPipelineStageFlags *stage_flags = (VkPipelineStageFlags*) - realloc(vk->hw.wait_dst_stages, - sizeof(VkPipelineStageFlags) * vk->hw.num_semaphores); + /* Allocate one extra in case we need to use WSI acquire semaphores. */ + VkPipelineStageFlags *stage_flags = (VkPipelineStageFlags*)realloc(vk->hw.wait_dst_stages, + sizeof(VkPipelineStageFlags) * (vk->hw.num_semaphores + 1)); + + VkSemaphore *new_semaphores = (VkSemaphore*)realloc(vk->hw.semaphores, + sizeof(VkSemaphore) * (vk->hw.num_semaphores + 1)); /* If this fails, we're screwed anyways. */ - retro_assert(stage_flags); + retro_assert(stage_flags && new_semaphores); vk->hw.wait_dst_stages = stage_flags; + vk->hw.semaphores = new_semaphores; for (i = 0; i < vk->hw.num_semaphores; i++) + { vk->hw.wait_dst_stages[i] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + vk->hw.semaphores[i] = semaphores[i]; + } vk->hw.valid_semaphore = true; vk->hw.src_queue_family = src_queue_family; @@ -1629,7 +1636,7 @@ static void vulkan_inject_black_frame(vk_t *vk, video_frame_info_t *video_info, vulkan_image_layout_transition(vk, vk->cmd, backbuffer->image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); vkCmdClearColorImage(vk->cmd, backbuffer->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, @@ -1645,12 +1652,28 @@ static void vulkan_inject_black_frame(vk_t *vk, video_frame_info_t *video_info, submit_info.commandBufferCount = 1; submit_info.pCommandBuffers = &vk->cmd; - if (vk->context->swapchain_semaphores[swapchain_index] != VK_NULL_HANDLE) + if (vk->context->has_acquired_swapchain && + vk->context->swapchain_semaphores[swapchain_index] != VK_NULL_HANDLE) { submit_info.signalSemaphoreCount = 1; submit_info.pSignalSemaphores = &vk->context->swapchain_semaphores[swapchain_index]; } + if (vk->context->has_acquired_swapchain && + vk->context->swapchain_acquire_semaphore != VK_NULL_HANDLE) + { + static const VkPipelineStageFlags wait_stage = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + assert(!vk->context->swapchain_wait_semaphores[frame_index]); + vk->context->swapchain_wait_semaphores[frame_index] = + vk->context->swapchain_acquire_semaphore; + vk->context->swapchain_acquire_semaphore = VK_NULL_HANDLE; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &vk->context->swapchain_wait_semaphores[frame_index]; + submit_info.pWaitDstStageMask = &wait_stage; + } + #ifdef HAVE_THREADS slock_lock(vk->context->queue_lock); #endif @@ -1922,11 +1945,11 @@ static bool vulkan_frame(void *data, const void *frame, clear_color.color.float32[2] = 0.0f; clear_color.color.float32[3] = 0.0f; - /* Prepare backbuffer for rendering. We don't use WSI semaphores here. */ + /* Prepare backbuffer for rendering. */ vulkan_image_layout_transition(vk, vk->cmd, backbuffer->image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 0, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); /* Begin render pass and set up viewport */ @@ -2023,7 +2046,7 @@ static bool vulkan_frame(void *data, const void *frame, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); vulkan_readback(vk); @@ -2049,8 +2072,8 @@ static bool vulkan_frame(void *data, const void *frame, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - VK_ACCESS_MEMORY_READ_BIT, - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + 0, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); } @@ -2097,6 +2120,35 @@ static bool vulkan_frame(void *data, const void *frame, /* Consume the semaphores. */ vk->hw.valid_semaphore = false; + + /* We allocated space for this. */ + if (vk->context->has_acquired_swapchain && + vk->context->swapchain_acquire_semaphore != VK_NULL_HANDLE) + { + assert(!vk->context->swapchain_wait_semaphores[frame_index]); + vk->context->swapchain_wait_semaphores[frame_index] = + vk->context->swapchain_acquire_semaphore; + vk->context->swapchain_acquire_semaphore = VK_NULL_HANDLE; + + vk->hw.semaphores[submit_info.waitSemaphoreCount] = vk->context->swapchain_wait_semaphores[frame_index]; + vk->hw.wait_dst_stages[submit_info.waitSemaphoreCount] = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + submit_info.waitSemaphoreCount++; + } + } + else if (vk->context->has_acquired_swapchain && + vk->context->swapchain_acquire_semaphore != VK_NULL_HANDLE) + { + static const VkPipelineStageFlags wait_stage = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + + assert(!vk->context->swapchain_wait_semaphores[frame_index]); + vk->context->swapchain_wait_semaphores[frame_index] = + vk->context->swapchain_acquire_semaphore; + vk->context->swapchain_acquire_semaphore = VK_NULL_HANDLE; + + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &vk->context->swapchain_wait_semaphores[frame_index]; + submit_info.pWaitDstStageMask = &wait_stage; } submit_info.signalSemaphoreCount = 0;