vulkan: Only submit command buffer at flip and avoid extra sync.

This commit is contained in:
Vincent Lejeune 2016-03-23 23:22:33 +01:00
parent 7a988c73a6
commit aa3c97f918
3 changed files with 73 additions and 137 deletions

View File

@ -381,15 +381,7 @@ VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan)
//create command buffer... //create command buffer...
m_command_buffer_pool.create((*m_device)); m_command_buffer_pool.create((*m_device));
m_command_buffer.create(m_command_buffer_pool); m_command_buffer.create(m_command_buffer_pool);
open_command_buffer();
VkCommandBufferInheritanceInfo inheritance_info = {};
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
VkCommandBufferBeginInfo begin_infos = {};
begin_infos.pInheritanceInfo = &inheritance_info;
begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
CHECK_RESULT(vkBeginCommandBuffer(m_command_buffer, &begin_infos));
for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i) for (u32 i = 0; i < m_swap_chain->get_swap_image_count(); ++i)
{ {
@ -406,9 +398,6 @@ VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan)
} }
CHECK_RESULT(vkEndCommandBuffer(m_command_buffer));
execute_command_buffer(false);
#define RING_BUFFER_SIZE 16 * 1024 * 1024 #define RING_BUFFER_SIZE 16 * 1024 * 1024
m_uniform_buffer_ring_info.init(RING_BUFFER_SIZE); m_uniform_buffer_ring_info.init(RING_BUFFER_SIZE);
@ -433,16 +422,21 @@ VKGSRender::VKGSRender() : GSRender(frame_type::Vulkan)
null_buffer = std::make_unique<vk::buffer>(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0); null_buffer = std::make_unique<vk::buffer>(*m_device, 32, m_memory_type_mapping.host_visible_coherent, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0);
null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32); null_buffer_view = std::make_unique<vk::buffer_view>(*m_device, null_buffer->value, VK_FORMAT_R32_SFLOAT, 0, 32);
VkFenceCreateInfo fence_info = {};
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
CHECK_RESULT(vkCreateFence(*m_device, &fence_info, nullptr, &m_submit_fence));
VkSemaphoreCreateInfo semaphore_info = {};
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &m_present_semaphore);
} }
VKGSRender::~VKGSRender() VKGSRender::~VKGSRender()
{ {
if (m_submit_fence) CHECK_RESULT(vkQueueWaitIdle(m_swap_chain->get_present_queue()));
{
vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, 1000000L);
vkDestroyFence((*m_device), m_submit_fence, nullptr);
m_submit_fence = nullptr;
}
if (m_present_semaphore) if (m_present_semaphore)
{ {
@ -500,8 +494,6 @@ void VKGSRender::begin()
//TODO: Fence sync, ring-buffers, etc //TODO: Fence sync, ring-buffers, etc
//CHECK_RESULT(vkDeviceWaitIdle((*m_device))); //CHECK_RESULT(vkDeviceWaitIdle((*m_device)));
if (!recording)
begin_command_buffer_recording();
VkDescriptorSetAllocateInfo alloc_info = {}; VkDescriptorSetAllocateInfo alloc_info = {};
alloc_info.descriptorPool = descriptor_pool; alloc_info.descriptorPool = descriptor_pool;
alloc_info.descriptorSetCount = 1; alloc_info.descriptorSetCount = 1;
@ -616,9 +608,6 @@ void VKGSRender::end()
vkCmdEndRenderPass(m_command_buffer); vkCmdEndRenderPass(m_command_buffer);
end_command_buffer_recording();
execute_command_buffer(false);
rsx::thread::end(); rsx::thread::end();
} }
@ -680,11 +669,6 @@ void VKGSRender::clear_surface(u32 mask)
if (m_current_present_image== 0xFFFF) return; if (m_current_present_image== 0xFFFF) return;
bool was_recording = recording;
if (!was_recording)
begin_command_buffer_recording();
init_buffers(); init_buffers();
float depth_clear = 1.f; float depth_clear = 1.f;
@ -756,13 +740,16 @@ void VKGSRender::clear_surface(u32 mask)
change_image_layout(m_command_buffer, depth_stencil_image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); change_image_layout(m_command_buffer, depth_stencil_image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range);
} }
if (!was_recording)
{
end_command_buffer_recording();
execute_command_buffer(false);
} }
recording = was_recording; void VKGSRender::sync_at_semaphore_release()
{
close_and_submit_command_buffer({}, m_submit_fence);
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
open_command_buffer();
} }
bool VKGSRender::do_method(u32 cmd, u32 arg) bool VKGSRender::do_method(u32 cmd, u32 arg)
@ -772,6 +759,10 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)
case NV4097_CLEAR_SURFACE: case NV4097_CLEAR_SURFACE:
clear_surface(arg); clear_surface(arg);
return true; return true;
case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
sync_at_semaphore_release();
return false; //call rsx::thread method implementation
default: default:
return false; return false;
} }
@ -978,20 +969,6 @@ static const u32 mr_color_pitch[rsx::limits::color_buffers_count] =
void VKGSRender::init_buffers(bool skip_reading) void VKGSRender::init_buffers(bool skip_reading)
{ {
if (dirty_frame)
{
//Prepare surface for new frame
VkSemaphoreCreateInfo semaphore_info = {};
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &m_present_semaphore);
VkFence nullFence = VK_NULL_HANDLE;
CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), 0, m_present_semaphore, nullFence, &m_current_present_image));
dirty_frame = false;
}
prepare_rtts(); prepare_rtts();
if (!skip_reading) if (!skip_reading)
@ -1010,7 +987,25 @@ void VKGSRender::write_buffers()
{ {
} }
void VKGSRender::begin_command_buffer_recording() void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence)
{
CHECK_RESULT(vkEndCommandBuffer(m_command_buffer));
VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
VkCommandBuffer cmd = m_command_buffer;
VkSubmitInfo infos = {};
infos.commandBufferCount = 1;
infos.pCommandBuffers = &cmd;
infos.pWaitDstStageMask = &pipe_stage_flags;
infos.pWaitSemaphores = semaphores.data();
infos.waitSemaphoreCount = semaphores.size();
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
CHECK_RESULT(vkQueueSubmit(m_swap_chain->get_present_queue(), 1, &infos, fence));
}
void VKGSRender::open_command_buffer()
{ {
VkCommandBufferInheritanceInfo inheritance_info = {}; VkCommandBufferInheritanceInfo inheritance_info = {};
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
@ -1018,25 +1013,10 @@ void VKGSRender::begin_command_buffer_recording()
VkCommandBufferBeginInfo begin_infos = {}; VkCommandBufferBeginInfo begin_infos = {};
begin_infos.pInheritanceInfo = &inheritance_info; begin_infos.pInheritanceInfo = &inheritance_info;
begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
if (m_submit_fence)
{
vkWaitForFences(*m_device, 1, &m_submit_fence, VK_TRUE, ~0ULL);
vkDestroyFence(*m_device, m_submit_fence, nullptr);
m_submit_fence = nullptr;
CHECK_RESULT(vkResetCommandBuffer(m_command_buffer, 0));
}
CHECK_RESULT(vkBeginCommandBuffer(m_command_buffer, &begin_infos)); CHECK_RESULT(vkBeginCommandBuffer(m_command_buffer, &begin_infos));
recording = true;
} }
void VKGSRender::end_command_buffer_recording()
{
recording = false;
CHECK_RESULT(vkEndCommandBuffer(m_command_buffer));
}
void VKGSRender::prepare_rtts() void VKGSRender::prepare_rtts()
{ {
@ -1110,31 +1090,6 @@ void VKGSRender::prepare_rtts()
m_framebuffer_to_clean.push_back(std::make_unique<vk::framebuffer>(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images))); m_framebuffer_to_clean.push_back(std::make_unique<vk::framebuffer>(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images)));
} }
void VKGSRender::execute_command_buffer(bool wait)
{
if (recording)
throw EXCEPTION("execute_command_buffer called before end_command_buffer_recording()!");
if (m_submit_fence)
throw EXCEPTION("Synchronization deadlock!");
VkFenceCreateInfo fence_info = {};
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
CHECK_RESULT(vkCreateFence(*m_device, &fence_info, nullptr, &m_submit_fence));
VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
VkCommandBuffer cmd = m_command_buffer;
VkSubmitInfo infos = {};
infos.commandBufferCount = 1;
infos.pCommandBuffers = &cmd;
infos.pWaitDstStageMask = &pipe_stage_flags;
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
CHECK_RESULT(vkQueueSubmit(m_swap_chain->get_present_queue(), 1, &infos, m_submit_fence));
CHECK_RESULT(vkQueueWaitIdle(m_swap_chain->get_present_queue()));
}
void VKGSRender::flip(int buffer) void VKGSRender::flip(int buffer)
{ {
@ -1178,28 +1133,21 @@ void VKGSRender::flip(int buffer)
VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain); VkSwapchainKHR swap_chain = (VkSwapchainKHR)(*m_swap_chain);
uint32_t next_image_temp = 0; uint32_t next_image_temp = 0;
VkPresentInfoKHR present = {}; //Prepare surface for new frame
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; CHECK_RESULT(vkAcquireNextImageKHR((*m_device), (*m_swap_chain), 0, m_present_semaphore, VK_NULL_HANDLE, &m_current_present_image));
present.pNext = nullptr;
present.swapchainCount = 1;
present.pSwapchains = &swap_chain;
present.pImageIndices = &m_current_present_image;
present.pWaitSemaphores = &m_present_semaphore;
present.waitSemaphoreCount = 1;
begin_command_buffer_recording();
if (m_present_semaphore)
{
//Blit contents to screen.. //Blit contents to screen..
VkImage image_to_flip = nullptr; VkImage image_to_flip = nullptr;
if (std::get<1>(m_rtts.m_bound_render_targets[0]) != nullptr) if (std::get<1>(m_rtts.m_bound_render_targets[0]) != nullptr)
image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[0])->value; image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[0])->value;
else else if (std::get<1>(m_rtts.m_bound_render_targets[1]) != nullptr)
image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[1])->value; image_to_flip = std::get<1>(m_rtts.m_bound_render_targets[1])->value;
VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_present_image); VkImage target_image = m_swap_chain->get_swap_chain_image(m_current_present_image);
if (image_to_flip)
{
vk::copy_scaled_image(m_command_buffer, image_to_flip, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, vk::copy_scaled_image(m_command_buffer, image_to_flip, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
buffer_width, buffer_height, aspect_ratio.width, aspect_ratio.height, 1, VK_IMAGE_ASPECT_COLOR_BIT); buffer_width, buffer_height, aspect_ratio.width, aspect_ratio.height, 1, VK_IMAGE_ASPECT_COLOR_BIT);
} }
@ -1212,37 +1160,27 @@ void VKGSRender::flip(int buffer)
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT);
VkClearColorValue clear_black = { 0 }; VkClearColorValue clear_black = { 0 };
vkCmdClearColorImage(m_command_buffer, m_swap_chain->get_swap_chain_image(next_image_temp), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, &clear_black, 1, &range); vkCmdClearColorImage(m_command_buffer, m_swap_chain->get_swap_chain_image(next_image_temp), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, &clear_black, 1, &range);
present.pImageIndices = &next_image_temp;
present.waitSemaphoreCount = 0;
} }
end_command_buffer_recording(); close_and_submit_command_buffer({ m_present_semaphore }, m_submit_fence);
execute_command_buffer(false);
//Check if anything is waiting in queue and wait for it if possible..
if (m_submit_fence)
{
CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL)); CHECK_RESULT(vkWaitForFences((*m_device), 1, &m_submit_fence, VK_TRUE, ~0ULL));
vkDestroyFence((*m_device), m_submit_fence, nullptr); VkPresentInfoKHR present = {};
m_submit_fence = nullptr; present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
present.pNext = nullptr;
CHECK_RESULT(vkResetCommandBuffer(m_command_buffer, 0)); present.swapchainCount = 1;
} present.pSwapchains = &swap_chain;
present.pImageIndices = &m_current_present_image;
CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present)); CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present));
CHECK_RESULT(vkQueueWaitIdle(m_swap_chain->get_present_queue()));
CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
open_command_buffer();
m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one(); m_uniform_buffer_ring_info.m_get_pos = m_uniform_buffer_ring_info.get_current_put_pos_minus_one();
m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one(); m_index_buffer_ring_info.m_get_pos = m_index_buffer_ring_info.get_current_put_pos_minus_one();
m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one(); m_attrib_ring_info.m_get_pos = m_attrib_ring_info.get_current_put_pos_minus_one();
m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one(); m_texture_upload_buffer_ring_info.m_get_pos = m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one();
if (m_present_semaphore)
{
vkDestroySemaphore((*m_device), m_present_semaphore, nullptr);
m_present_semaphore = nullptr;
}
//Feed back damaged resources to the main texture cache for management... //Feed back damaged resources to the main texture cache for management...
// m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources); // m_texture_cache.merge_dirty_textures(m_rtts.invalidated_resources);

View File

@ -61,7 +61,6 @@ private:
vk::command_pool m_command_buffer_pool; vk::command_pool m_command_buffer_pool;
vk::command_buffer m_command_buffer; vk::command_buffer m_command_buffer;
bool recording = false;
bool dirty_frame = true; bool dirty_frame = true;
@ -85,10 +84,9 @@ public:
private: private:
void clear_surface(u32 mask); void clear_surface(u32 mask);
void execute_command_buffer(bool wait); void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence);
void begin_command_buffer_recording(); void open_command_buffer();
void end_command_buffer_recording(); void sync_at_semaphore_release();
void prepare_rtts(); void prepare_rtts();
/// returns primitive topology, is_indexed, index_count, offset in index buffer, index type /// returns primitive topology, is_indexed, index_count, offset in index buffer, index type
std::tuple<VkPrimitiveTopology, bool, u32, VkDeviceSize, VkIndexType> upload_vertex_data(); std::tuple<VkPrimitiveTopology, bool, u32, VkDeviceSize, VkIndexType> upload_vertex_data();

View File

@ -899,7 +899,7 @@ namespace vk
{ {
owner = &dev; owner = &dev;
VkCommandPoolCreateInfo infos = {}; VkCommandPoolCreateInfo infos = {};
infos.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; infos.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; infos.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool)); CHECK_RESULT(vkCreateCommandPool(dev, &infos, nullptr, &pool));