From 69be82c7860f4303f4ab03a5be5488c25b9f2bff Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 21 Oct 2016 19:44:11 -0500 Subject: [PATCH] Attempt at synchronizing swap image writes between the main window and graphics backend. Disabled for now due to device timeouts. --- src/xenia/gpu/command_processor.h | 4 +- .../gpu/vulkan/vulkan_command_processor.cc | 197 ++++++++++-------- .../gpu/vulkan/vulkan_command_processor.h | 18 +- .../gpu/vulkan/vulkan_graphics_system.cc | 8 +- src/xenia/ui/vulkan/vulkan_swap_chain.cc | 26 ++- src/xenia/ui/vulkan/vulkan_swap_chain.h | 4 + 6 files changed, 157 insertions(+), 100 deletions(-) diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index c2784480b..a418dd683 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -47,6 +47,8 @@ struct SwapState { uintptr_t front_buffer_texture = 0; // Current back buffer, being updated by the CP. uintptr_t back_buffer_texture = 0; + // Backend data + void* backend_data = nullptr; // Whether the back buffer is dirty and a swap is pending. bool pending = false; }; @@ -115,7 +117,7 @@ class CommandProcessor { virtual bool SetupContext() = 0; virtual void ShutdownContext() = 0; - void WriteRegister(uint32_t index, uint32_t value); + virtual void WriteRegister(uint32_t index, uint32_t value); virtual void MakeCoherent(); virtual void PrepareForWait(); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 47de85c15..4fd7ff4ee 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -49,14 +49,7 @@ void VulkanCommandProcessor::RequestFrameTrace(const std::wstring& root_path) { void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); - - auto status = vkQueueWaitIdle(queue_); - CheckResult(status, "vkQueueWaitIdle"); - - buffer_cache_->ClearCache(); - pipeline_cache_->ClearCache(); - render_cache_->ClearCache(); - texture_cache_->ClearCache(); + cache_clear_requested_ = true; } bool VulkanCommandProcessor::SetupContext() { @@ -89,15 +82,29 @@ bool VulkanCommandProcessor::SetupContext() { texture_cache_->texture_descriptor_set_layout()); render_cache_ = std::make_unique(register_file_, device_); + VkSemaphoreCreateInfo info; + std::memset(&info, 0, sizeof(info)); + info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + VkResult result = vkCreateSemaphore( + *device_, &info, nullptr, + reinterpret_cast(&swap_state_.backend_data)); + if (result != VK_SUCCESS) { + return false; + } + return true; } void VulkanCommandProcessor::ShutdownContext() { // TODO(benvanik): wait until idle. + vkDestroySemaphore(*device_, + reinterpret_cast(swap_state_.backend_data), + nullptr); + if (swap_state_.front_buffer_texture) { - // Free swap chain images. - DestroySwapImages(); + // Free swap chain image. + DestroySwapImage(); } buffer_cache_.reset(); @@ -123,9 +130,15 @@ void VulkanCommandProcessor::MakeCoherent() { CommandProcessor::MakeCoherent(); + // Make region coherent if (status_host & 0x80000000ul) { // TODO(benvanik): less-fine-grained clearing. buffer_cache_->InvalidateCache(); + + if ((status_host & 0x01000000) != 0 && (status_host & 0x02000000) == 0) { + coher_base_vc_ = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; + coher_size_vc_ = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; + } } } @@ -149,8 +162,33 @@ void VulkanCommandProcessor::ReturnFromWait() { CommandProcessor::ReturnFromWait(); } -void VulkanCommandProcessor::CreateSwapImages(VkCommandBuffer setup_buffer, - VkExtent2D extents) { +void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) { + CommandProcessor::WriteRegister(index, value); + + if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X && + index <= XE_GPU_REG_SHADER_CONSTANT_511_W) { + uint32_t offset = index - XE_GPU_REG_SHADER_CONSTANT_000_X; + offset /= 4 * 4; + offset ^= 0x3F; + + dirty_float_constants_ |= (1ull << offset); + } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && + index <= XE_GPU_REG_SHADER_CONSTANT_BOOL_224_255) { + uint32_t offset = index - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031; + offset ^= 0x7; + + dirty_bool_constants_ |= (1 << offset); + } else if (index >= XE_GPU_REG_SHADER_CONSTANT_LOOP_00 && + index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { + uint32_t offset = index - XE_GPU_REG_SHADER_CONSTANT_LOOP_00; + offset ^= 0x1F; + + dirty_loop_constants_ |= (1 << offset); + } +} + +void VulkanCommandProcessor::CreateSwapImage(VkCommandBuffer setup_buffer, + VkExtent2D extents) { VkImageCreateInfo image_info; std::memset(&image_info, 0, sizeof(VkImageCreateInfo)); image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; @@ -168,34 +206,23 @@ void VulkanCommandProcessor::CreateSwapImages(VkCommandBuffer setup_buffer, image_info.pQueueFamilyIndices = nullptr; image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - VkImage image_fb, image_bb; + VkImage image_fb; auto status = vkCreateImage(*device_, &image_info, nullptr, &image_fb); CheckResult(status, "vkCreateImage"); - status = vkCreateImage(*device_, &image_info, nullptr, &image_bb); - CheckResult(status, "vkCreateImage"); - - // Bind memory to images. + // Bind memory to image. VkMemoryRequirements mem_requirements; vkGetImageMemoryRequirements(*device_, image_fb, &mem_requirements); - fb_memory = device_->AllocateMemory(mem_requirements, 0); - assert_not_null(fb_memory); + fb_memory_ = device_->AllocateMemory(mem_requirements, 0); + assert_not_null(fb_memory_); - status = vkBindImageMemory(*device_, image_fb, fb_memory, 0); - CheckResult(status, "vkBindImageMemory"); - - vkGetImageMemoryRequirements(*device_, image_fb, &mem_requirements); - bb_memory = device_->AllocateMemory(mem_requirements, 0); - assert_not_null(bb_memory); - - status = vkBindImageMemory(*device_, image_bb, bb_memory, 0); + status = vkBindImageMemory(*device_, image_fb, fb_memory_, 0); CheckResult(status, "vkBindImageMemory"); std::lock_guard lock(swap_state_.mutex); swap_state_.front_buffer_texture = reinterpret_cast(image_fb); - swap_state_.back_buffer_texture = reinterpret_cast(image_bb); - // Transition both images to general layout. + // Transition image to general layout. VkImageMemoryBarrier barrier; std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -208,32 +235,20 @@ void VulkanCommandProcessor::CreateSwapImages(VkCommandBuffer setup_buffer, barrier.image = image_fb; barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &barrier); - - barrier.image = image_bb; - vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); } -void VulkanCommandProcessor::DestroySwapImages() { +void VulkanCommandProcessor::DestroySwapImage() { std::lock_guard lock(swap_state_.mutex); vkDestroyImage(*device_, reinterpret_cast(swap_state_.front_buffer_texture), nullptr); - vkDestroyImage(*device_, - reinterpret_cast(swap_state_.back_buffer_texture), - nullptr); - vkFreeMemory(*device_, fb_memory, nullptr); - vkFreeMemory(*device_, bb_memory, nullptr); + vkFreeMemory(*device_, fb_memory_, nullptr); swap_state_.front_buffer_texture = 0; - swap_state_.back_buffer_texture = 0; - fb_memory = nullptr; - bb_memory = nullptr; + fb_memory_ = nullptr; } void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, @@ -267,10 +282,27 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, frontbuffer_ptr = last_copy_base_; } - if (!swap_state_.back_buffer_texture) { - CreateSwapImages(copy_commands, {frontbuffer_width, frontbuffer_height}); + if (!swap_state_.front_buffer_texture) { + CreateSwapImage(copy_commands, {frontbuffer_width, frontbuffer_height}); + + // Signal the swap usage semaphore by default. + auto swap_sem = reinterpret_cast(swap_state_.backend_data); + + VkSubmitInfo info; + std::memset(&info, 0, sizeof(info)); + info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + info.signalSemaphoreCount = 1; + info.pSignalSemaphores = &swap_sem; + if (queue_mutex_) { + std::lock_guard lock(*queue_mutex_); + status = vkQueueSubmit(queue_, 1, &info, nullptr); + CheckResult(status, "vkQueueSubmit"); + } else { + status = vkQueueSubmit(queue_, 1, &info, nullptr); + CheckResult(status, "vkQueueSubmit"); + } } - auto swap_bb = reinterpret_cast(swap_state_.back_buffer_texture); + auto swap_fb = reinterpret_cast(swap_state_.front_buffer_texture); // Issue the commands to copy the game's frontbuffer to our backbuffer. auto texture = texture_cache_->LookupAddress( @@ -310,7 +342,7 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, int32_t(frontbuffer_height), 1}; vkCmdBlitImage(copy_commands, texture->image, texture->image_layout, - swap_bb, VK_IMAGE_LAYOUT_GENERAL, 1, &blit, + swap_fb, VK_IMAGE_LAYOUT_GENERAL, 1, &blit, VK_FILTER_LINEAR); std::lock_guard lock(swap_state_.mutex); @@ -351,13 +383,28 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, queue_mutex_->lock(); } + // TODO: We really don't need to wrap all the commands with this semaphore, + // only the copy commands. + auto swap_sem = reinterpret_cast(swap_state_.backend_data); + VkSubmitInfo submit_info; std::memset(&submit_info, 0, sizeof(VkSubmitInfo)); submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submit_info.commandBufferCount = uint32_t(submit_buffers.size()); submit_info.pCommandBuffers = submit_buffers.data(); - status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_); - CheckResult(status, "vkQueueSubmit"); + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &swap_sem; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &swap_sem; + + if (queue_mutex_) { + std::lock_guard lock(*queue_mutex_); + status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_); + CheckResult(status, "vkQueueSubmit"); + } else { + status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_); + CheckResult(status, "vkQueueSubmit"); + } if (device_->is_renderdoc_attached() && capturing_) { device_->EndRenderDocFrameCapture(); @@ -370,6 +417,17 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, command_buffer_pool_->EndBatch(current_batch_fence_); + if (cache_clear_requested_) { + cache_clear_requested_ = false; + VkFence fences[] = {*current_batch_fence_}; + vkWaitForFences(*device_, 1, fences, VK_TRUE, -1); + + buffer_cache_->ClearCache(); + pipeline_cache_->ClearCache(); + render_cache_->ClearCache(); + texture_cache_->ClearCache(); + } + // Scavenging. { #if FINE_GRAINED_DRAW_SCOPES @@ -492,10 +550,6 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, current_render_state_ = render_cache_->BeginRenderPass( command_buffer, vertex_shader, pixel_shader); if (!current_render_state_) { - command_buffer_pool_->CancelBatch(); - current_command_buffer_ = nullptr; - current_setup_buffer_ = nullptr; - current_batch_fence_ = nullptr; return false; } } @@ -512,46 +566,22 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } else if (pipeline_status == PipelineCache::UpdateStatus::kError) { - render_cache_->EndRenderPass(); - command_buffer_pool_->CancelBatch(); - current_command_buffer_ = nullptr; - current_setup_buffer_ = nullptr; - current_batch_fence_ = nullptr; - current_render_state_ = nullptr; return false; } pipeline_cache_->SetDynamicState(command_buffer, started_command_buffer); // Pass registers to the shaders. if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) { - render_cache_->EndRenderPass(); - command_buffer_pool_->CancelBatch(); - current_command_buffer_ = nullptr; - current_setup_buffer_ = nullptr; - current_batch_fence_ = nullptr; - current_render_state_ = nullptr; return false; } // Upload and bind index buffer data (if we have any). if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) { - render_cache_->EndRenderPass(); - command_buffer_pool_->CancelBatch(); - current_command_buffer_ = nullptr; - current_setup_buffer_ = nullptr; - current_batch_fence_ = nullptr; - current_render_state_ = nullptr; return false; } // Upload and bind all vertex buffer data. if (!PopulateVertexBuffers(command_buffer, vertex_shader)) { - render_cache_->EndRenderPass(); - command_buffer_pool_->CancelBatch(); - current_command_buffer_ = nullptr; - current_setup_buffer_ = nullptr; - current_batch_fence_ = nullptr; - current_render_state_ = nullptr; return false; } @@ -560,12 +590,6 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, // Setup buffer may be flushed to GPU if the texture cache needs it. if (!PopulateSamplers(command_buffer, setup_buffer, vertex_shader, pixel_shader)) { - render_cache_->EndRenderPass(); - command_buffer_pool_->CancelBatch(); - current_command_buffer_ = nullptr; - current_setup_buffer_ = nullptr; - current_batch_fence_ = nullptr; - current_render_state_ = nullptr; return false; } @@ -719,11 +743,12 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( // THIS CAN BE MASSIVELY INCORRECT (too large). size_t valid_range = size_t(fetch->size * 4); - trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range); + uint32_t physical_address = fetch->address << 2; + trace_writer_.WriteMemoryRead(physical_address, valid_range); // Upload (or get a cached copy of) the buffer. const void* source_ptr = - memory_->TranslatePhysical(fetch->address << 2); + memory_->TranslatePhysical(physical_address); size_t source_length = valid_range; auto buffer_ref = buffer_cache_->UploadVertexBuffer( source_ptr, source_length, static_cast(fetch->endian), diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index f58e2319b..3e1311647 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -62,8 +62,10 @@ class VulkanCommandProcessor : public CommandProcessor { void PrepareForWait() override; void ReturnFromWait() override; - void CreateSwapImages(VkCommandBuffer setup_buffer, VkExtent2D extents); - void DestroySwapImages(); + void WriteRegister(uint32_t index, uint32_t value) override; + + void CreateSwapImage(VkCommandBuffer setup_buffer, VkExtent2D extents); + void DestroySwapImage(); void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) override; @@ -90,8 +92,14 @@ class VulkanCommandProcessor : public CommandProcessor { xe::ui::vulkan::VulkanDevice* device_ = nullptr; // front buffer / back buffer memory - VkDeviceMemory fb_memory = nullptr; - VkDeviceMemory bb_memory = nullptr; + VkDeviceMemory fb_memory_ = nullptr; + + uint64_t dirty_float_constants_ = 0; // Dirty float constants in blocks of 4 + uint8_t dirty_bool_constants_ = 0; + uint32_t dirty_loop_constants_ = 0; + + uint32_t coher_base_vc_ = 0; + uint32_t coher_size_vc_ = 0; // TODO(benvanik): abstract behind context? // Queue used to submit work. This may be a dedicated queue for the command @@ -103,8 +111,10 @@ class VulkanCommandProcessor : public CommandProcessor { // Last copy base address, for debugging only. uint32_t last_copy_base_ = 0; + bool capturing_ = false; bool trace_requested_ = false; + bool cache_clear_requested_ = false; std::unique_ptr buffer_cache_; std::unique_ptr pipeline_cache_; diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc index 08c6120d7..7286adc8c 100644 --- a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc @@ -64,8 +64,6 @@ void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) { std::lock_guard lock(swap_state.mutex); if (swap_state.pending) { swap_state.pending = false; - std::swap(swap_state.front_buffer_texture, - swap_state.back_buffer_texture); } } @@ -74,11 +72,17 @@ void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) { return; } + auto semaphore = reinterpret_cast(swap_state.backend_data); auto swap_chain = display_context_->swap_chain(); auto copy_cmd_buffer = swap_chain->copy_cmd_buffer(); auto front_buffer = reinterpret_cast(swap_state.front_buffer_texture); + // Wait on and signal the swap semaphore. + // TODO(DrChat): Interacting with the window causes the device to be lost in + // some games. + // swap_chain->WaitAndSignalSemaphore(semaphore); + VkImageMemoryBarrier barrier; std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.cc b/src/xenia/ui/vulkan/vulkan_swap_chain.cc index f4273bb72..29a5a1308 100644 --- a/src/xenia/ui/vulkan/vulkan_swap_chain.cc +++ b/src/xenia/ui/vulkan/vulkan_swap_chain.cc @@ -338,6 +338,10 @@ bool VulkanSwapChain::Reinitialize() { return Initialize(surface); } +void VulkanSwapChain::WaitAndSignalSemaphore(VkSemaphore sem) { + wait_and_signal_semaphores_.push_back(sem); +} + void VulkanSwapChain::Shutdown() { // TODO(benvanik): properly wait for a clean state. for (auto& buffer : buffers_) { @@ -372,6 +376,8 @@ void VulkanSwapChain::Shutdown() { } bool VulkanSwapChain::Begin() { + wait_and_signal_semaphores_.clear(); + // Get the index of the next available swapchain image. auto err = vkAcquireNextImageKHR(*device_, handle, 0, image_available_semaphore_, @@ -521,28 +527,34 @@ bool VulkanSwapChain::End() { VkPipelineStageFlags wait_dst_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + std::vector semaphores; + for (size_t i = 0; i < wait_and_signal_semaphores_.size(); i++) { + semaphores.push_back(wait_and_signal_semaphores_[i]); + } + semaphores.push_back(image_usage_semaphore_); + // Submit copy commands. + // Wait on the image usage semaphore (signaled when an image is available) VkSubmitInfo render_submit_info; render_submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; render_submit_info.pNext = nullptr; - render_submit_info.waitSemaphoreCount = 1; - render_submit_info.pWaitSemaphores = &image_usage_semaphore_; + render_submit_info.waitSemaphoreCount = uint32_t(semaphores.size()); + render_submit_info.pWaitSemaphores = semaphores.data(); render_submit_info.pWaitDstStageMask = &wait_dst_stage; render_submit_info.commandBufferCount = 1; render_submit_info.pCommandBuffers = ©_cmd_buffer_; - render_submit_info.signalSemaphoreCount = 1; - render_submit_info.pSignalSemaphores = &image_usage_semaphore_; + render_submit_info.signalSemaphoreCount = uint32_t(semaphores.size()); + render_submit_info.pSignalSemaphores = semaphores.data(); { std::lock_guard queue_lock(device_->primary_queue_mutex()); err = vkQueueSubmit(device_->primary_queue(), 1, &render_submit_info, nullptr); } - // Submit render commands. + // Submit render commands, and don't signal the usage semaphore. render_submit_info.commandBufferCount = 1; render_submit_info.pCommandBuffers = &render_cmd_buffer_; - render_submit_info.signalSemaphoreCount = 0; - render_submit_info.pSignalSemaphores = nullptr; + render_submit_info.signalSemaphoreCount = uint32_t(semaphores.size()) - 1; { std::lock_guard queue_lock(device_->primary_queue_mutex()); err = vkQueueSubmit(device_->primary_queue(), 1, &render_submit_info, diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.h b/src/xenia/ui/vulkan/vulkan_swap_chain.h index fe2c0b038..dda96d51c 100644 --- a/src/xenia/ui/vulkan/vulkan_swap_chain.h +++ b/src/xenia/ui/vulkan/vulkan_swap_chain.h @@ -53,6 +53,9 @@ class VulkanSwapChain { // torn down and recreated with the new surface properties (size/etc). bool Reinitialize(); + // Waits on and signals a semaphore in this operation. + void WaitAndSignalSemaphore(VkSemaphore sem); + // Begins the swap operation, preparing state for rendering. bool Begin(); // Ends the swap operation, finalizing rendering and presenting the results. @@ -86,6 +89,7 @@ class VulkanSwapChain { VkSemaphore image_usage_semaphore_ = nullptr; uint32_t current_buffer_index_ = 0; std::vector buffers_; + std::vector wait_and_signal_semaphores_; }; } // namespace vulkan