Merge pull request #7905 from stenzek/vulkan-cleanup
Vulkan: Simplify command buffer fence tracking
This commit is contained in:
commit
377615b06f
|
@ -99,7 +99,7 @@ void BoundingBox::Flush()
|
||||||
StagingBuffer::BufferMemoryBarrier(
|
StagingBuffer::BufferMemoryBarrier(
|
||||||
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT,
|
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
// We're now up-to-date.
|
// We're now up-to-date.
|
||||||
|
@ -223,7 +223,7 @@ void BoundingBox::Readback()
|
||||||
StagingBuffer::BufferMemoryBarrier(
|
StagingBuffer::BufferMemoryBarrier(
|
||||||
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
|
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer,
|
||||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0,
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0,
|
||||||
BUFFER_SIZE, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
||||||
m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(),
|
m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(),
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT);
|
VK_PIPELINE_STAGE_TRANSFER_BIT);
|
||||||
|
@ -237,7 +237,7 @@ void BoundingBox::Readback()
|
||||||
StagingBuffer::BufferMemoryBarrier(
|
StagingBuffer::BufferMemoryBarrier(
|
||||||
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_READ_BIT,
|
g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_READ_BIT,
|
||||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE,
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
|
||||||
m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(),
|
m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(),
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,6 @@ bool CommandBufferManager::CreateCommandBuffers()
|
||||||
{
|
{
|
||||||
resources.init_command_buffer_used = false;
|
resources.init_command_buffer_used = false;
|
||||||
resources.semaphore_used = false;
|
resources.semaphore_used = false;
|
||||||
resources.needs_fence_wait = false;
|
|
||||||
|
|
||||||
VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0,
|
VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0,
|
||||||
g_vulkan_context->GetGraphicsQueueFamilyIndex()};
|
g_vulkan_context->GetGraphicsQueueFamilyIndex()};
|
||||||
|
@ -211,43 +210,61 @@ void CommandBufferManager::WaitForWorkerThreadIdle()
|
||||||
m_submit_semaphore.Post();
|
m_submit_semaphore.Post();
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandBufferManager::WaitForGPUIdle()
|
void CommandBufferManager::WaitForFenceCounter(u64 fence_counter)
|
||||||
{
|
{
|
||||||
WaitForWorkerThreadIdle();
|
if (m_completed_fence_counter >= fence_counter)
|
||||||
vkDeviceWaitIdle(g_vulkan_context->GetDevice());
|
|
||||||
}
|
|
||||||
|
|
||||||
void CommandBufferManager::WaitForFence(VkFence fence)
|
|
||||||
{
|
|
||||||
// Find the command buffer that this fence corresponds to.
|
|
||||||
u32 command_buffer_index = 0;
|
|
||||||
for (; command_buffer_index < static_cast<u32>(m_frame_resources.size()); command_buffer_index++)
|
|
||||||
{
|
|
||||||
if (m_frame_resources[command_buffer_index].fence == fence)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
ASSERT(command_buffer_index < m_frame_resources.size());
|
|
||||||
|
|
||||||
// Has this command buffer already been waited for?
|
|
||||||
if (!m_frame_resources[command_buffer_index].needs_fence_wait)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
// Find the first command buffer which covers this counter value.
|
||||||
|
u32 index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
|
||||||
|
while (index != m_current_frame)
|
||||||
|
{
|
||||||
|
if (m_frame_resources[index].fence_counter >= fence_counter)
|
||||||
|
break;
|
||||||
|
|
||||||
|
index = (index + 1) % NUM_COMMAND_BUFFERS;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT(index != m_current_frame);
|
||||||
|
WaitForCommandBufferCompletion(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandBufferManager::WaitForCommandBufferCompletion(u32 index)
|
||||||
|
{
|
||||||
// Ensure this command buffer has been submitted.
|
// Ensure this command buffer has been submitted.
|
||||||
WaitForWorkerThreadIdle();
|
WaitForWorkerThreadIdle();
|
||||||
|
|
||||||
// Wait for this command buffer to be completed.
|
// Wait for this command buffer to be completed.
|
||||||
VkResult res =
|
VkResult res = vkWaitForFences(g_vulkan_context->GetDevice(), 1, &m_frame_resources[index].fence,
|
||||||
vkWaitForFences(g_vulkan_context->GetDevice(), 1,
|
VK_TRUE, UINT64_MAX);
|
||||||
&m_frame_resources[command_buffer_index].fence, VK_TRUE, UINT64_MAX);
|
|
||||||
if (res != VK_SUCCESS)
|
if (res != VK_SUCCESS)
|
||||||
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
|
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
|
||||||
|
|
||||||
// Immediately fire callbacks and cleanups, since the commands has been completed.
|
// Clean up any resources for command buffers between the last known completed buffer and this
|
||||||
m_frame_resources[command_buffer_index].needs_fence_wait = false;
|
// now-completed command buffer. If we use >2 buffers, this may be more than one buffer.
|
||||||
OnCommandBufferExecuted(command_buffer_index);
|
const u64 now_completed_counter = m_frame_resources[index].fence_counter;
|
||||||
|
u32 cleanup_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
|
||||||
|
while (cleanup_index != m_current_frame)
|
||||||
|
{
|
||||||
|
FrameResources& resources = m_frame_resources[cleanup_index];
|
||||||
|
if (resources.fence_counter > now_completed_counter)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (resources.fence_counter > m_completed_fence_counter)
|
||||||
|
{
|
||||||
|
for (auto& it : resources.cleanup_resources)
|
||||||
|
it();
|
||||||
|
resources.cleanup_resources.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_completed_fence_counter = now_completed_counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
|
void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
|
||||||
|
bool wait_for_completion,
|
||||||
VkSwapchainKHR present_swap_chain,
|
VkSwapchainKHR present_swap_chain,
|
||||||
uint32_t present_image_index)
|
uint32_t present_image_index)
|
||||||
{
|
{
|
||||||
|
@ -263,16 +280,13 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This command buffer now has commands, so can't be re-used without waiting.
|
|
||||||
resources.needs_fence_wait = true;
|
|
||||||
|
|
||||||
// Grab the semaphore before submitting command buffer either on-thread or off-thread.
|
// Grab the semaphore before submitting command buffer either on-thread or off-thread.
|
||||||
// This prevents a race from occurring where a second command buffer is executed
|
// This prevents a race from occurring where a second command buffer is executed
|
||||||
// before the worker thread has woken and executed the first one yet.
|
// before the worker thread has woken and executed the first one yet.
|
||||||
m_submit_semaphore.Wait();
|
m_submit_semaphore.Wait();
|
||||||
|
|
||||||
// Submitting off-thread?
|
// Submitting off-thread?
|
||||||
if (m_use_threaded_submission && submit_on_worker_thread)
|
if (m_use_threaded_submission && submit_on_worker_thread && !wait_for_completion)
|
||||||
{
|
{
|
||||||
// Push to the pending submit queue.
|
// Push to the pending submit queue.
|
||||||
{
|
{
|
||||||
|
@ -287,6 +301,8 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
|
||||||
{
|
{
|
||||||
// Pass through to normal submission path.
|
// Pass through to normal submission path.
|
||||||
SubmitCommandBuffer(m_current_frame, present_swap_chain, present_image_index);
|
SubmitCommandBuffer(m_current_frame, present_swap_chain, present_image_index);
|
||||||
|
if (wait_for_completion)
|
||||||
|
WaitForCommandBufferCompletion(m_current_frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Switch to next cmdbuffer.
|
// Switch to next cmdbuffer.
|
||||||
|
@ -365,39 +381,15 @@ void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
|
||||||
m_submit_semaphore.Post();
|
m_submit_semaphore.Post();
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandBufferManager::OnCommandBufferExecuted(u32 index)
|
|
||||||
{
|
|
||||||
FrameResources& resources = m_frame_resources[index];
|
|
||||||
|
|
||||||
// Fire fence tracking callbacks.
|
|
||||||
for (auto iter = m_fence_callbacks.begin(); iter != m_fence_callbacks.end();)
|
|
||||||
{
|
|
||||||
auto backup_iter = iter++;
|
|
||||||
backup_iter->second(resources.fence);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up all objects pending destruction on this command buffer
|
|
||||||
for (auto& it : resources.cleanup_resources)
|
|
||||||
it();
|
|
||||||
resources.cleanup_resources.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
void CommandBufferManager::BeginCommandBuffer()
|
void CommandBufferManager::BeginCommandBuffer()
|
||||||
{
|
{
|
||||||
// Move to the next command buffer.
|
// Move to the next command buffer.
|
||||||
m_current_frame = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
|
const u32 next_buffer_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
|
||||||
FrameResources& resources = m_frame_resources[m_current_frame];
|
FrameResources& resources = m_frame_resources[next_buffer_index];
|
||||||
|
|
||||||
// Wait for the GPU to finish with all resources for this command buffer.
|
// Wait for the GPU to finish with all resources for this command buffer.
|
||||||
if (resources.needs_fence_wait)
|
if (resources.fence_counter > m_completed_fence_counter)
|
||||||
{
|
WaitForCommandBufferCompletion(next_buffer_index);
|
||||||
VkResult res =
|
|
||||||
vkWaitForFences(g_vulkan_context->GetDevice(), 1, &resources.fence, true, UINT64_MAX);
|
|
||||||
if (res != VK_SUCCESS)
|
|
||||||
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
|
|
||||||
|
|
||||||
OnCommandBufferExecuted(m_current_frame);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset fence to unsignaled before starting.
|
// Reset fence to unsignaled before starting.
|
||||||
VkResult res = vkResetFences(g_vulkan_context->GetDevice(), 1, &resources.fence);
|
VkResult res = vkResetFences(g_vulkan_context->GetDevice(), 1, &resources.fence);
|
||||||
|
@ -427,6 +419,8 @@ void CommandBufferManager::BeginCommandBuffer()
|
||||||
// Reset upload command buffer state
|
// Reset upload command buffer state
|
||||||
resources.init_command_buffer_used = false;
|
resources.init_command_buffer_used = false;
|
||||||
resources.semaphore_used = false;
|
resources.semaphore_used = false;
|
||||||
|
resources.fence_counter = m_next_fence_counter++;
|
||||||
|
m_current_frame = next_buffer_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandBufferManager::DeferBufferDestruction(VkBuffer object)
|
void CommandBufferManager::DeferBufferDestruction(VkBuffer object)
|
||||||
|
@ -471,19 +465,5 @@ void CommandBufferManager::DeferImageViewDestruction(VkImageView object)
|
||||||
[object]() { vkDestroyImageView(g_vulkan_context->GetDevice(), object, nullptr); });
|
[object]() { vkDestroyImageView(g_vulkan_context->GetDevice(), object, nullptr); });
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandBufferManager::AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback)
|
|
||||||
{
|
|
||||||
// Shouldn't be adding twice.
|
|
||||||
ASSERT(m_fence_callbacks.find(key) == m_fence_callbacks.end());
|
|
||||||
m_fence_callbacks.emplace(key, std::move(callback));
|
|
||||||
}
|
|
||||||
|
|
||||||
void CommandBufferManager::RemoveFenceSignaledCallback(const void* key)
|
|
||||||
{
|
|
||||||
auto iter = m_fence_callbacks.find(key);
|
|
||||||
ASSERT(iter != m_fence_callbacks.end());
|
|
||||||
m_fence_callbacks.erase(iter);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<CommandBufferManager> g_command_buffer_mgr;
|
std::unique_ptr<CommandBufferManager> g_command_buffer_mgr;
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -51,9 +51,15 @@ public:
|
||||||
// Allocates a descriptors set from the pool reserved for the current frame.
|
// Allocates a descriptors set from the pool reserved for the current frame.
|
||||||
VkDescriptorSet AllocateDescriptorSet(VkDescriptorSetLayout set_layout);
|
VkDescriptorSet AllocateDescriptorSet(VkDescriptorSetLayout set_layout);
|
||||||
|
|
||||||
|
// Fence "counters" are used to track which commands have been completed by the GPU.
|
||||||
|
// If the last completed fence counter is greater or equal to N, it means that the work
|
||||||
|
// associated counter N has been completed by the GPU. The value of N to associate with
|
||||||
|
// commands can be retreived by calling GetCurrentFenceCounter().
|
||||||
|
u64 GetCompletedFenceCounter() const { return m_completed_fence_counter; }
|
||||||
|
|
||||||
// Gets the fence that will be signaled when the currently executing command buffer is
|
// Gets the fence that will be signaled when the currently executing command buffer is
|
||||||
// queued and executed. Do not wait for this fence before the buffer is executed.
|
// queued and executed. Do not wait for this fence before the buffer is executed.
|
||||||
VkFence GetCurrentCommandBufferFence() const { return m_frame_resources[m_current_frame].fence; }
|
u64 GetCurrentFenceCounter() const { return m_frame_resources[m_current_frame].fence_counter; }
|
||||||
|
|
||||||
// Returns the semaphore for the current command buffer, which can be used to ensure the
|
// Returns the semaphore for the current command buffer, which can be used to ensure the
|
||||||
// swap chain image is ready before the command buffer executes.
|
// swap chain image is ready before the command buffer executes.
|
||||||
|
@ -66,15 +72,11 @@ public:
|
||||||
// Ensure that the worker thread has submitted any previous command buffers and is idle.
|
// Ensure that the worker thread has submitted any previous command buffers and is idle.
|
||||||
void WaitForWorkerThreadIdle();
|
void WaitForWorkerThreadIdle();
|
||||||
|
|
||||||
// Ensure that the worker thread has both submitted all commands, and the GPU has caught up.
|
|
||||||
// Use with caution, huge performance penalty.
|
|
||||||
void WaitForGPUIdle();
|
|
||||||
|
|
||||||
// Wait for a fence to be completed.
|
// Wait for a fence to be completed.
|
||||||
// Also invokes callbacks for completion.
|
// Also invokes callbacks for completion.
|
||||||
void WaitForFence(VkFence fence);
|
void WaitForFenceCounter(u64 fence_counter);
|
||||||
|
|
||||||
void SubmitCommandBuffer(bool submit_on_worker_thread,
|
void SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion,
|
||||||
VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE,
|
VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE,
|
||||||
uint32_t present_image_index = 0xFFFFFFFF);
|
uint32_t present_image_index = 0xFFFFFFFF);
|
||||||
|
|
||||||
|
@ -90,25 +92,17 @@ public:
|
||||||
void DeferImageDestruction(VkImage object);
|
void DeferImageDestruction(VkImage object);
|
||||||
void DeferImageViewDestruction(VkImageView object);
|
void DeferImageViewDestruction(VkImageView object);
|
||||||
|
|
||||||
// Instruct the manager to fire the specified callback when a fence is flagged to be signaled.
|
|
||||||
// This happens when command buffers are executed, and can be tested if signaled, which means
|
|
||||||
// that all commands up to the point when the callback was fired have completed.
|
|
||||||
using FenceSignaledCallback = std::function<void(VkFence)>;
|
|
||||||
void AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback);
|
|
||||||
void RemoveFenceSignaledCallback(const void* key);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool CreateCommandBuffers();
|
bool CreateCommandBuffers();
|
||||||
void DestroyCommandBuffers();
|
void DestroyCommandBuffers();
|
||||||
|
|
||||||
bool CreateSubmitThread();
|
bool CreateSubmitThread();
|
||||||
|
|
||||||
|
void WaitForCommandBufferCompletion(u32 command_buffer_index);
|
||||||
void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain,
|
void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain,
|
||||||
u32 present_image_index);
|
u32 present_image_index);
|
||||||
void BeginCommandBuffer();
|
void BeginCommandBuffer();
|
||||||
|
|
||||||
void OnCommandBufferExecuted(u32 index);
|
|
||||||
|
|
||||||
struct FrameResources
|
struct FrameResources
|
||||||
{
|
{
|
||||||
// [0] - Init (upload) command buffer, [1] - draw command buffer
|
// [0] - Init (upload) command buffer, [1] - draw command buffer
|
||||||
|
@ -117,19 +111,19 @@ private:
|
||||||
VkDescriptorPool descriptor_pool = VK_NULL_HANDLE;
|
VkDescriptorPool descriptor_pool = VK_NULL_HANDLE;
|
||||||
VkFence fence = VK_NULL_HANDLE;
|
VkFence fence = VK_NULL_HANDLE;
|
||||||
VkSemaphore semaphore = VK_NULL_HANDLE;
|
VkSemaphore semaphore = VK_NULL_HANDLE;
|
||||||
|
u64 fence_counter = 0;
|
||||||
bool init_command_buffer_used = false;
|
bool init_command_buffer_used = false;
|
||||||
bool semaphore_used = false;
|
bool semaphore_used = false;
|
||||||
bool needs_fence_wait = false;
|
|
||||||
|
|
||||||
std::vector<std::function<void()>> cleanup_resources;
|
std::vector<std::function<void()>> cleanup_resources;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
u64 m_next_fence_counter = 1;
|
||||||
|
u64 m_completed_fence_counter = 0;
|
||||||
|
|
||||||
std::array<FrameResources, NUM_COMMAND_BUFFERS> m_frame_resources;
|
std::array<FrameResources, NUM_COMMAND_BUFFERS> m_frame_resources;
|
||||||
u32 m_current_frame;
|
u32 m_current_frame;
|
||||||
|
|
||||||
// callbacks when a fence point is set
|
|
||||||
std::map<const void*, FenceSignaledCallback> m_fence_callbacks;
|
|
||||||
|
|
||||||
// Threaded command buffer execution
|
// Threaded command buffer execution
|
||||||
// Semaphore determines when a command buffer can be queued
|
// Semaphore determines when a command buffer can be queued
|
||||||
Common::Semaphore m_submit_semaphore;
|
Common::Semaphore m_submit_semaphore;
|
||||||
|
|
|
@ -14,7 +14,6 @@
|
||||||
|
|
||||||
#include "VideoBackends/Vulkan/CommandBufferManager.h"
|
#include "VideoBackends/Vulkan/CommandBufferManager.h"
|
||||||
#include "VideoBackends/Vulkan/Renderer.h"
|
#include "VideoBackends/Vulkan/Renderer.h"
|
||||||
#include "VideoBackends/Vulkan/StagingBuffer.h"
|
|
||||||
#include "VideoBackends/Vulkan/StateTracker.h"
|
#include "VideoBackends/Vulkan/StateTracker.h"
|
||||||
#include "VideoBackends/Vulkan/VulkanContext.h"
|
#include "VideoBackends/Vulkan/VulkanContext.h"
|
||||||
|
|
||||||
|
@ -24,17 +23,10 @@ PerfQuery::PerfQuery() = default;
|
||||||
|
|
||||||
PerfQuery::~PerfQuery()
|
PerfQuery::~PerfQuery()
|
||||||
{
|
{
|
||||||
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
|
|
||||||
|
|
||||||
if (m_query_pool != VK_NULL_HANDLE)
|
if (m_query_pool != VK_NULL_HANDLE)
|
||||||
vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr);
|
vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
Vulkan::PerfQuery* PerfQuery::GetInstance()
|
|
||||||
{
|
|
||||||
return static_cast<PerfQuery*>(g_perf_query.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PerfQuery::Initialize()
|
bool PerfQuery::Initialize()
|
||||||
{
|
{
|
||||||
if (!CreateQueryPool())
|
if (!CreateQueryPool())
|
||||||
|
@ -43,50 +35,30 @@ bool PerfQuery::Initialize()
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!CreateReadbackBuffer())
|
|
||||||
{
|
|
||||||
PanicAlert("Failed to create readback buffer");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
g_command_buffer_mgr->AddFenceSignaledCallback(
|
|
||||||
this, std::bind(&PerfQuery::OnFenceSignaled, this, std::placeholders::_1));
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
||||||
{
|
{
|
||||||
// Have we used half of the query buffer already?
|
|
||||||
if (m_query_count > m_query_buffer.size() / 2)
|
|
||||||
NonBlockingPartialFlush();
|
|
||||||
|
|
||||||
// Block if there are no free slots.
|
// Block if there are no free slots.
|
||||||
if (m_query_count == PERF_QUERY_BUFFER_SIZE)
|
// Otherwise, try to keep half of them available.
|
||||||
{
|
if (m_query_count > m_query_buffer.size() / 2)
|
||||||
// ERROR_LOG(VIDEO, "Flushed query buffer early!");
|
PartialFlush(m_query_count == PERF_QUERY_BUFFER_SIZE);
|
||||||
BlockingPartialFlush();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
u32 index = (m_query_read_pos + m_query_count) % PERF_QUERY_BUFFER_SIZE;
|
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
|
||||||
ActiveQuery& entry = m_query_buffer[index];
|
DEBUG_ASSERT(!entry.has_value);
|
||||||
ASSERT(!entry.active && !entry.available);
|
entry.has_value = true;
|
||||||
entry.active = true;
|
|
||||||
m_query_count++;
|
|
||||||
|
|
||||||
DEBUG_LOG(VIDEO, "start query %u", index);
|
|
||||||
|
|
||||||
// Use precise queries if supported, otherwise boolean (which will be incorrect).
|
// Use precise queries if supported, otherwise boolean (which will be incorrect).
|
||||||
VkQueryControlFlags flags = 0;
|
VkQueryControlFlags flags =
|
||||||
if (g_vulkan_context->SupportsPreciseOcclusionQueries())
|
g_vulkan_context->SupportsPreciseOcclusionQueries() ? VK_QUERY_CONTROL_PRECISE_BIT : 0;
|
||||||
flags = VK_QUERY_CONTROL_PRECISE_BIT;
|
|
||||||
|
|
||||||
// Ensure the query starts within a render pass.
|
// Ensure the query starts within a render pass.
|
||||||
// TODO: Is this needed?
|
|
||||||
StateTracker::GetInstance()->BeginRenderPass();
|
StateTracker::GetInstance()->BeginRenderPass();
|
||||||
vkCmdBeginQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, index, flags);
|
vkCmdBeginQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos,
|
||||||
|
flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,16 +66,17 @@ void PerfQuery::DisableQuery(PerfQueryGroup type)
|
||||||
{
|
{
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
// DisableQuery should be called for each EnableQuery, so subtract one to get the previous one.
|
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos);
|
||||||
u32 index = (m_query_read_pos + m_query_count - 1) % PERF_QUERY_BUFFER_SIZE;
|
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
|
||||||
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, index);
|
m_query_count++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::ResetQuery()
|
void PerfQuery::ResetQuery()
|
||||||
{
|
{
|
||||||
m_query_count = 0;
|
m_query_count = 0;
|
||||||
m_query_read_pos = 0;
|
m_query_readback_pos = 0;
|
||||||
|
m_query_next_pos = 0;
|
||||||
std::fill_n(m_results, ArraySize(m_results), 0);
|
std::fill_n(m_results, ArraySize(m_results), 0);
|
||||||
|
|
||||||
// Reset entire query pool, ensuring all queries are ready to write to.
|
// Reset entire query pool, ensuring all queries are ready to write to.
|
||||||
|
@ -111,34 +84,20 @@ void PerfQuery::ResetQuery()
|
||||||
vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, 0,
|
vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, 0,
|
||||||
PERF_QUERY_BUFFER_SIZE);
|
PERF_QUERY_BUFFER_SIZE);
|
||||||
|
|
||||||
for (auto& entry : m_query_buffer)
|
std::memset(m_query_buffer.data(), 0, sizeof(ActiveQuery) * m_query_buffer.size());
|
||||||
{
|
|
||||||
entry.pending_fence = VK_NULL_HANDLE;
|
|
||||||
entry.available = false;
|
|
||||||
entry.active = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
||||||
{
|
{
|
||||||
u32 result = 0;
|
u32 result = 0;
|
||||||
|
|
||||||
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
||||||
{
|
|
||||||
result = m_results[PQG_ZCOMP_ZCOMPLOC];
|
result = m_results[PQG_ZCOMP_ZCOMPLOC];
|
||||||
}
|
|
||||||
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
||||||
{
|
|
||||||
result = m_results[PQG_ZCOMP];
|
result = m_results[PQG_ZCOMP];
|
||||||
}
|
|
||||||
else if (type == PQ_BLEND_INPUT)
|
else if (type == PQ_BLEND_INPUT)
|
||||||
{
|
|
||||||
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
|
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
|
||||||
}
|
|
||||||
else if (type == PQ_EFB_COPY_CLOCKS)
|
else if (type == PQ_EFB_COPY_CLOCKS)
|
||||||
{
|
|
||||||
result = m_results[PQG_EFB_COPY_CLOCKS];
|
result = m_results[PQG_EFB_COPY_CLOCKS];
|
||||||
}
|
|
||||||
|
|
||||||
return result / 4;
|
return result / 4;
|
||||||
}
|
}
|
||||||
|
@ -146,7 +105,7 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
||||||
void PerfQuery::FlushResults()
|
void PerfQuery::FlushResults()
|
||||||
{
|
{
|
||||||
while (!IsFlushed())
|
while (!IsFlushed())
|
||||||
BlockingPartialFlush();
|
PartialFlush(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PerfQuery::IsFlushed() const
|
bool PerfQuery::IsFlushed() const
|
||||||
|
@ -175,194 +134,79 @@ bool PerfQuery::CreateQueryPool()
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PerfQuery::CreateReadbackBuffer()
|
void PerfQuery::ReadbackQueries()
|
||||||
{
|
{
|
||||||
m_readback_buffer = StagingBuffer::Create(STAGING_BUFFER_TYPE_READBACK,
|
const u64 completed_fence_counter = g_command_buffer_mgr->GetCompletedFenceCounter();
|
||||||
PERF_QUERY_BUFFER_SIZE * sizeof(PerfQueryDataType),
|
|
||||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
|
||||||
|
|
||||||
// Leave the buffer persistently mapped, we invalidate it when we need to read.
|
|
||||||
if (!m_readback_buffer || !m_readback_buffer->Map())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PerfQuery::QueueCopyQueryResults(u32 start_index, u32 query_count)
|
|
||||||
{
|
|
||||||
DEBUG_LOG(VIDEO, "queue copy of queries %u-%u", start_index, start_index + query_count - 1);
|
|
||||||
|
|
||||||
// Transition buffer for GPU write
|
|
||||||
// TODO: Is this needed?
|
|
||||||
m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(),
|
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT);
|
|
||||||
|
|
||||||
// Copy from queries -> buffer
|
|
||||||
vkCmdCopyQueryPoolResults(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool,
|
|
||||||
start_index, query_count, m_readback_buffer->GetBuffer(),
|
|
||||||
start_index * sizeof(PerfQueryDataType), sizeof(PerfQueryDataType),
|
|
||||||
VK_QUERY_RESULT_WAIT_BIT);
|
|
||||||
|
|
||||||
// Prepare for host readback
|
|
||||||
m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(),
|
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
|
||||||
|
|
||||||
// Reset queries so they're ready to use again
|
|
||||||
vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, start_index,
|
|
||||||
query_count);
|
|
||||||
|
|
||||||
// Flag all queries as available, but with a fence that has to be completed first
|
|
||||||
for (u32 i = 0; i < query_count; i++)
|
|
||||||
{
|
|
||||||
u32 index = start_index + i;
|
|
||||||
ActiveQuery& entry = m_query_buffer[index];
|
|
||||||
entry.pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
|
|
||||||
entry.available = true;
|
|
||||||
entry.active = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void PerfQuery::FlushQueries()
|
|
||||||
{
|
|
||||||
// Flag all pending queries that aren't available as available after execution.
|
|
||||||
u32 copy_start_index = 0;
|
|
||||||
u32 copy_count = 0;
|
|
||||||
for (u32 i = 0; i < m_query_count; i++)
|
|
||||||
{
|
|
||||||
u32 index = (m_query_read_pos + i) % PERF_QUERY_BUFFER_SIZE;
|
|
||||||
ActiveQuery& entry = m_query_buffer[index];
|
|
||||||
|
|
||||||
// Skip already-copied queries (will happen if a flush hasn't occurred and
|
|
||||||
// a command buffer hasn't finished executing).
|
|
||||||
if (entry.available)
|
|
||||||
{
|
|
||||||
// These should be grouped together, and at the start.
|
|
||||||
ASSERT(copy_count == 0);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this wrapped around, we need to flush the entries before the end of the buffer.
|
|
||||||
ASSERT(entry.active);
|
|
||||||
if (index < copy_start_index)
|
|
||||||
{
|
|
||||||
QueueCopyQueryResults(copy_start_index, copy_count);
|
|
||||||
copy_start_index = index;
|
|
||||||
copy_count = 0;
|
|
||||||
}
|
|
||||||
else if (copy_count == 0)
|
|
||||||
{
|
|
||||||
copy_start_index = index;
|
|
||||||
}
|
|
||||||
copy_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (copy_count > 0)
|
|
||||||
QueueCopyQueryResults(copy_start_index, copy_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PerfQuery::OnFenceSignaled(VkFence fence)
|
|
||||||
{
|
|
||||||
// Need to save these since ProcessResults will modify them.
|
// Need to save these since ProcessResults will modify them.
|
||||||
u32 query_read_pos = m_query_read_pos;
|
const u32 outstanding_queries = m_query_count;
|
||||||
u32 query_count = m_query_count;
|
u32 readback_count = 0;
|
||||||
|
for (u32 i = 0; i < outstanding_queries; i++)
|
||||||
// Flush as many queries as are bound to this fence.
|
|
||||||
u32 flush_start_index = 0;
|
|
||||||
u32 flush_count = 0;
|
|
||||||
for (u32 i = 0; i < query_count; i++)
|
|
||||||
{
|
{
|
||||||
u32 index = (query_read_pos + i) % PERF_QUERY_BUFFER_SIZE;
|
u32 index = (m_query_readback_pos + readback_count) % PERF_QUERY_BUFFER_SIZE;
|
||||||
if (m_query_buffer[index].pending_fence != fence)
|
const ActiveQuery& entry = m_query_buffer[index];
|
||||||
{
|
if (entry.fence_counter > completed_fence_counter)
|
||||||
// These should be grouped together, at the end.
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
// If this wrapped around, we need to flush the entries before the end of the buffer.
|
// If this wrapped around, we need to flush the entries before the end of the buffer.
|
||||||
if (index < flush_start_index)
|
if (index < m_query_readback_pos)
|
||||||
{
|
{
|
||||||
ProcessResults(flush_start_index, flush_count);
|
ReadbackQueries(readback_count);
|
||||||
flush_start_index = index;
|
DEBUG_ASSERT(m_query_readback_pos == 0);
|
||||||
flush_count = 0;
|
readback_count = 0;
|
||||||
}
|
|
||||||
else if (flush_count == 0)
|
|
||||||
{
|
|
||||||
flush_start_index = index;
|
|
||||||
}
|
|
||||||
flush_count++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flush_count > 0)
|
readback_count++;
|
||||||
ProcessResults(flush_start_index, flush_count);
|
}
|
||||||
|
|
||||||
|
if (readback_count > 0)
|
||||||
|
ReadbackQueries(readback_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::ProcessResults(u32 start_index, u32 query_count)
|
void PerfQuery::ReadbackQueries(u32 query_count)
|
||||||
{
|
{
|
||||||
// Invalidate CPU caches before reading back.
|
|
||||||
m_readback_buffer->InvalidateCPUCache(start_index * sizeof(PerfQueryDataType),
|
|
||||||
query_count * sizeof(PerfQueryDataType));
|
|
||||||
|
|
||||||
// Should be at maximum query_count queries pending.
|
// Should be at maximum query_count queries pending.
|
||||||
ASSERT(query_count <= m_query_count);
|
ASSERT(query_count <= m_query_count &&
|
||||||
DEBUG_LOG(VIDEO, "process queries %u-%u", start_index, start_index + query_count - 1);
|
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);
|
||||||
|
|
||||||
|
// Read back from the GPU.
|
||||||
|
VkResult res =
|
||||||
|
vkGetQueryPoolResults(g_vulkan_context->GetDevice(), m_query_pool, m_query_readback_pos,
|
||||||
|
query_count, query_count * sizeof(PerfQueryDataType),
|
||||||
|
m_query_result_buffer.data(), sizeof(PerfQueryDataType), 0);
|
||||||
|
if (res != VK_SUCCESS)
|
||||||
|
LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
|
||||||
|
|
||||||
// Remove pending queries.
|
// Remove pending queries.
|
||||||
for (u32 i = 0; i < query_count; i++)
|
for (u32 i = 0; i < query_count; i++)
|
||||||
{
|
{
|
||||||
u32 index = (m_query_read_pos + i) % PERF_QUERY_BUFFER_SIZE;
|
u32 index = (m_query_readback_pos + i) % PERF_QUERY_BUFFER_SIZE;
|
||||||
ActiveQuery& entry = m_query_buffer[index];
|
ActiveQuery& entry = m_query_buffer[index];
|
||||||
|
|
||||||
// Should have a fence associated with it (waiting for a result).
|
// Should have a fence associated with it (waiting for a result).
|
||||||
ASSERT(entry.pending_fence != VK_NULL_HANDLE);
|
DEBUG_ASSERT(entry.fence_counter != 0);
|
||||||
entry.pending_fence = VK_NULL_HANDLE;
|
entry.fence_counter = 0;
|
||||||
entry.available = false;
|
entry.has_value = false;
|
||||||
entry.active = false;
|
|
||||||
|
|
||||||
// Grab result from readback buffer, it will already have been invalidated.
|
|
||||||
u32 result;
|
|
||||||
m_readback_buffer->Read(index * sizeof(PerfQueryDataType), &result, sizeof(result), false);
|
|
||||||
DEBUG_LOG(VIDEO, " query result %u", result);
|
|
||||||
|
|
||||||
// NOTE: Reported pixel metrics should be referenced to native resolution
|
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||||
m_results[entry.query_type] +=
|
m_results[entry.query_type] +=
|
||||||
static_cast<u32>(static_cast<u64>(result) * EFB_WIDTH / g_renderer->GetTargetWidth() *
|
static_cast<u32>(static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
|
||||||
EFB_HEIGHT / g_renderer->GetTargetHeight());
|
g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight());
|
||||||
}
|
}
|
||||||
|
|
||||||
m_query_read_pos = (m_query_read_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
|
m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
|
||||||
m_query_count -= query_count;
|
m_query_count -= query_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::NonBlockingPartialFlush()
|
void PerfQuery::PartialFlush(bool blocking)
|
||||||
{
|
{
|
||||||
if (IsFlushed())
|
|
||||||
return;
|
|
||||||
|
|
||||||
// Submit a command buffer in the background if the front query is not bound to one.
|
// Submit a command buffer in the background if the front query is not bound to one.
|
||||||
// Ideally this will complete before the buffer fills.
|
if (blocking || m_query_buffer[m_query_readback_pos].fence_counter ==
|
||||||
if (m_query_buffer[m_query_read_pos].pending_fence == VK_NULL_HANDLE)
|
g_command_buffer_mgr->GetCurrentFenceCounter())
|
||||||
Renderer::GetInstance()->ExecuteCommandBuffer(true, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PerfQuery::BlockingPartialFlush()
|
|
||||||
{
|
|
||||||
if (IsFlushed())
|
|
||||||
return;
|
|
||||||
|
|
||||||
// If the first pending query is needing command buffer execution, do that.
|
|
||||||
ActiveQuery& entry = m_query_buffer[m_query_read_pos];
|
|
||||||
if (entry.pending_fence == VK_NULL_HANDLE)
|
|
||||||
{
|
{
|
||||||
// This will callback OnCommandBufferQueued which will set the fence on the entry.
|
Renderer::GetInstance()->ExecuteCommandBuffer(true, blocking);
|
||||||
// We wait for completion, which will also call OnCommandBufferExecuted, and clear the fence.
|
|
||||||
Renderer::GetInstance()->ExecuteCommandBuffer(false, true);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// The command buffer has been submitted, but is awaiting completion.
|
|
||||||
// Wait for the fence to complete, which will call OnCommandBufferExecuted.
|
|
||||||
g_command_buffer_mgr->WaitForFence(entry.pending_fence);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ReadbackQueries();
|
||||||
}
|
}
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -13,18 +13,15 @@
|
||||||
|
|
||||||
namespace Vulkan
|
namespace Vulkan
|
||||||
{
|
{
|
||||||
class StagingBuffer;
|
|
||||||
|
|
||||||
class PerfQuery : public PerfQueryBase
|
class PerfQuery : public PerfQueryBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
PerfQuery();
|
PerfQuery();
|
||||||
~PerfQuery();
|
~PerfQuery();
|
||||||
|
|
||||||
static PerfQuery* GetInstance();
|
static PerfQuery* GetInstance() { return static_cast<PerfQuery*>(g_perf_query.get()); }
|
||||||
|
|
||||||
bool Initialize();
|
bool Initialize();
|
||||||
void FlushQueries();
|
|
||||||
|
|
||||||
void EnableQuery(PerfQueryGroup type) override;
|
void EnableQuery(PerfQueryGroup type) override;
|
||||||
void DisableQuery(PerfQueryGroup type) override;
|
void DisableQuery(PerfQueryGroup type) override;
|
||||||
|
@ -34,37 +31,30 @@ public:
|
||||||
bool IsFlushed() const override;
|
bool IsFlushed() const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct ActiveQuery
|
// u32 is used for the sample counts.
|
||||||
{
|
using PerfQueryDataType = u32;
|
||||||
PerfQueryType query_type;
|
|
||||||
VkFence pending_fence;
|
|
||||||
bool available;
|
|
||||||
bool active;
|
|
||||||
};
|
|
||||||
|
|
||||||
bool CreateQueryPool();
|
|
||||||
bool CreateReadbackBuffer();
|
|
||||||
void QueueCopyQueryResults(u32 start_index, u32 query_count);
|
|
||||||
void ProcessResults(u32 start_index, u32 query_count);
|
|
||||||
|
|
||||||
void OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence);
|
|
||||||
void OnFenceSignaled(VkFence fence);
|
|
||||||
|
|
||||||
void NonBlockingPartialFlush();
|
|
||||||
void BlockingPartialFlush();
|
|
||||||
|
|
||||||
// when testing in SMS: 64 was too small, 128 was ok
|
// when testing in SMS: 64 was too small, 128 was ok
|
||||||
// TODO: This should be size_t, but the base class uses u32s
|
// TODO: This should be size_t, but the base class uses u32s
|
||||||
using PerfQueryDataType = u32;
|
|
||||||
static const u32 PERF_QUERY_BUFFER_SIZE = 512;
|
static const u32 PERF_QUERY_BUFFER_SIZE = 512;
|
||||||
std::array<ActiveQuery, PERF_QUERY_BUFFER_SIZE> m_query_buffer = {};
|
|
||||||
u32 m_query_read_pos = 0;
|
|
||||||
|
|
||||||
// TODO: Investigate using pipeline statistics to implement other query types
|
struct ActiveQuery
|
||||||
|
{
|
||||||
|
u64 fence_counter;
|
||||||
|
PerfQueryType query_type;
|
||||||
|
bool has_value;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool CreateQueryPool();
|
||||||
|
void ReadbackQueries();
|
||||||
|
void ReadbackQueries(u32 query_count);
|
||||||
|
void PartialFlush(bool blocking);
|
||||||
|
|
||||||
VkQueryPool m_query_pool = VK_NULL_HANDLE;
|
VkQueryPool m_query_pool = VK_NULL_HANDLE;
|
||||||
|
u32 m_query_readback_pos = 0;
|
||||||
// Buffer containing query results. Each query is a u32.
|
u32 m_query_next_pos = 0;
|
||||||
std::unique_ptr<StagingBuffer> m_readback_buffer;
|
std::array<ActiveQuery, PERF_QUERY_BUFFER_SIZE> m_query_buffer = {};
|
||||||
|
std::array<PerfQueryDataType, PERF_QUERY_BUFFER_SIZE> m_query_result_buffer = {};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -304,7 +304,6 @@ void Renderer::PresentBackbuffer()
|
||||||
{
|
{
|
||||||
// End drawing to backbuffer
|
// End drawing to backbuffer
|
||||||
StateTracker::GetInstance()->EndRenderPass();
|
StateTracker::GetInstance()->EndRenderPass();
|
||||||
PerfQuery::GetInstance()->FlushQueries();
|
|
||||||
|
|
||||||
// Transition the backbuffer to PRESENT_SRC to ensure all commands drawing
|
// Transition the backbuffer to PRESENT_SRC to ensure all commands drawing
|
||||||
// to it have finished before present.
|
// to it have finished before present.
|
||||||
|
@ -315,7 +314,7 @@ void Renderer::PresentBackbuffer()
|
||||||
// Because this final command buffer is rendering to the swap chain, we need to wait for
|
// Because this final command buffer is rendering to the swap chain, we need to wait for
|
||||||
// the available semaphore to be signaled before executing the buffer. This final submission
|
// the available semaphore to be signaled before executing the buffer. This final submission
|
||||||
// can happen off-thread in the background while we're preparing the next frame.
|
// can happen off-thread in the background while we're preparing the next frame.
|
||||||
g_command_buffer_mgr->SubmitCommandBuffer(true, m_swap_chain->GetSwapChain(),
|
g_command_buffer_mgr->SubmitCommandBuffer(true, false, m_swap_chain->GetSwapChain(),
|
||||||
m_swap_chain->GetCurrentImageIndex());
|
m_swap_chain->GetCurrentImageIndex());
|
||||||
|
|
||||||
// New cmdbuffer, so invalidate state.
|
// New cmdbuffer, so invalidate state.
|
||||||
|
@ -325,13 +324,8 @@ void Renderer::PresentBackbuffer()
|
||||||
void Renderer::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion)
|
void Renderer::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion)
|
||||||
{
|
{
|
||||||
StateTracker::GetInstance()->EndRenderPass();
|
StateTracker::GetInstance()->EndRenderPass();
|
||||||
PerfQuery::GetInstance()->FlushQueries();
|
|
||||||
|
|
||||||
// If we're waiting for completion, don't bother waking the worker thread.
|
g_command_buffer_mgr->SubmitCommandBuffer(submit_off_thread, wait_for_completion);
|
||||||
const VkFence pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
|
|
||||||
g_command_buffer_mgr->SubmitCommandBuffer(submit_off_thread && wait_for_completion);
|
|
||||||
if (wait_for_completion)
|
|
||||||
g_command_buffer_mgr->WaitForFence(pending_fence);
|
|
||||||
|
|
||||||
StateTracker::GetInstance()->InvalidateCachedState();
|
StateTracker::GetInstance()->InvalidateCachedState();
|
||||||
}
|
}
|
||||||
|
@ -550,10 +544,6 @@ void Renderer::UnbindTexture(const AbstractTexture* texture)
|
||||||
|
|
||||||
void Renderer::ResetSamplerStates()
|
void Renderer::ResetSamplerStates()
|
||||||
{
|
{
|
||||||
// Ensure none of the sampler objects are in use.
|
|
||||||
// This assumes that none of the samplers are in use on the command list currently being recorded.
|
|
||||||
g_command_buffer_mgr->WaitForGPUIdle();
|
|
||||||
|
|
||||||
// Invalidate all sampler states, next draw will re-initialize them.
|
// Invalidate all sampler states, next draw will re-initialize them.
|
||||||
for (u32 i = 0; i < m_sampler_states.size(); i++)
|
for (u32 i = 0; i < m_sampler_states.size(); i++)
|
||||||
{
|
{
|
||||||
|
|
|
@ -62,11 +62,13 @@ bool StateTracker::Initialize()
|
||||||
VKTexture::Create(TextureConfig(1, 1, 1, 1, 1, AbstractTextureFormat::RGBA8, 0));
|
VKTexture::Create(TextureConfig(1, 1, 1, 1, 1, AbstractTextureFormat::RGBA8, 0));
|
||||||
if (!m_dummy_texture)
|
if (!m_dummy_texture)
|
||||||
return false;
|
return false;
|
||||||
|
m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
|
||||||
|
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
|
|
||||||
// Initialize all samplers to point by default
|
// Initialize all samplers to point by default
|
||||||
for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++)
|
for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++)
|
||||||
{
|
{
|
||||||
m_bindings.samplers[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
m_bindings.samplers[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||||
m_bindings.samplers[i].imageView = m_dummy_texture->GetView();
|
m_bindings.samplers[i].imageView = m_dummy_texture->GetView();
|
||||||
m_bindings.samplers[i].sampler = g_object_cache->GetPointSampler();
|
m_bindings.samplers[i].sampler = g_object_cache->GetPointSampler();
|
||||||
}
|
}
|
||||||
|
@ -223,14 +225,14 @@ void StateTracker::UnbindTexture(VkImageView view)
|
||||||
if (it.imageView == view)
|
if (it.imageView == view)
|
||||||
{
|
{
|
||||||
it.imageView = m_dummy_texture->GetView();
|
it.imageView = m_dummy_texture->GetView();
|
||||||
it.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
it.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_bindings.image_texture.imageView == view)
|
if (m_bindings.image_texture.imageView == view)
|
||||||
{
|
{
|
||||||
m_bindings.image_texture.imageView = m_dummy_texture->GetView();
|
m_bindings.image_texture.imageView = m_dummy_texture->GetView();
|
||||||
m_bindings.image_texture.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
m_bindings.image_texture.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,14 +19,10 @@ namespace Vulkan
|
||||||
{
|
{
|
||||||
StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, u32 size) : m_usage(usage), m_size(size)
|
StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, u32 size) : m_usage(usage), m_size(size)
|
||||||
{
|
{
|
||||||
g_command_buffer_mgr->AddFenceSignaledCallback(
|
|
||||||
this, std::bind(&StreamBuffer::OnFenceSignaled, this, std::placeholders::_1));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamBuffer::~StreamBuffer()
|
StreamBuffer::~StreamBuffer()
|
||||||
{
|
{
|
||||||
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
|
|
||||||
|
|
||||||
if (m_host_pointer)
|
if (m_host_pointer)
|
||||||
vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);
|
vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);
|
||||||
|
|
||||||
|
@ -189,8 +185,6 @@ bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
|
||||||
// Can we find a fence to wait on that will give us enough memory?
|
// Can we find a fence to wait on that will give us enough memory?
|
||||||
if (WaitForClearSpace(required_bytes))
|
if (WaitForClearSpace(required_bytes))
|
||||||
{
|
{
|
||||||
ASSERT(m_current_offset == m_current_gpu_position ||
|
|
||||||
(m_current_offset + required_bytes) < m_current_gpu_position);
|
|
||||||
m_current_offset = Common::AlignUp(m_current_offset, alignment);
|
m_current_offset = Common::AlignUp(m_current_offset, alignment);
|
||||||
m_last_allocation_size = num_bytes;
|
m_last_allocation_size = num_bytes;
|
||||||
return true;
|
return true;
|
||||||
|
@ -225,36 +219,40 @@ void StreamBuffer::UpdateCurrentFencePosition()
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Has the offset changed since the last fence?
|
// Has the offset changed since the last fence?
|
||||||
const VkFence fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
|
const u64 counter = g_command_buffer_mgr->GetCurrentFenceCounter();
|
||||||
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence)
|
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
|
||||||
{
|
{
|
||||||
// Still haven't executed a command buffer, so just update the offset.
|
// Still haven't executed a command buffer, so just update the offset.
|
||||||
m_tracked_fences.back().second = m_current_offset;
|
m_tracked_fences.back().second = m_current_offset;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_tracked_fences.emplace_back(fence, m_current_offset);
|
// New buffer, so update the GPU position while we're at it.
|
||||||
|
UpdateGPUPosition();
|
||||||
|
m_tracked_fences.emplace_back(counter, m_current_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamBuffer::OnFenceSignaled(VkFence fence)
|
void StreamBuffer::UpdateGPUPosition()
|
||||||
{
|
{
|
||||||
// Locate the entry for this fence (if any, we may have been forced to wait already)
|
auto start = m_tracked_fences.begin();
|
||||||
auto iter = std::find_if(m_tracked_fences.begin(), m_tracked_fences.end(),
|
auto end = start;
|
||||||
[fence](const auto& it) { return it.first == fence; });
|
|
||||||
|
|
||||||
if (iter != m_tracked_fences.end())
|
const u64 completed_counter = g_command_buffer_mgr->GetCompletedFenceCounter();
|
||||||
|
while (end != m_tracked_fences.end() && completed_counter >= end->first)
|
||||||
{
|
{
|
||||||
// Update the GPU position, and remove any fences before this fence (since
|
m_current_gpu_position = end->second;
|
||||||
// it is implied that they have been signaled as well, though the callback
|
++end;
|
||||||
// should have removed them already).
|
|
||||||
m_current_gpu_position = iter->second;
|
|
||||||
m_tracked_fences.erase(m_tracked_fences.begin(), ++iter);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (start != end)
|
||||||
|
m_tracked_fences.erase(start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
|
bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
|
||||||
{
|
{
|
||||||
u32 new_offset = 0;
|
u32 new_offset = 0;
|
||||||
|
u32 new_gpu_position = 0;
|
||||||
|
|
||||||
auto iter = m_tracked_fences.begin();
|
auto iter = m_tracked_fences.begin();
|
||||||
for (; iter != m_tracked_fences.end(); iter++)
|
for (; iter != m_tracked_fences.end(); iter++)
|
||||||
{
|
{
|
||||||
|
@ -265,20 +263,32 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
|
||||||
u32 gpu_position = iter->second;
|
u32 gpu_position = iter->second;
|
||||||
if (m_current_offset == gpu_position)
|
if (m_current_offset == gpu_position)
|
||||||
{
|
{
|
||||||
// Start at the start of the buffer again.
|
|
||||||
new_offset = 0;
|
new_offset = 0;
|
||||||
|
new_gpu_position = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assuming that we wait for this fence, are we allocating in front of the GPU?
|
// Assuming that we wait for this fence, are we allocating in front of the GPU?
|
||||||
if (m_current_offset > gpu_position)
|
if (m_current_offset > gpu_position)
|
||||||
{
|
{
|
||||||
|
// This would suggest the GPU has now followed us and wrapped around, so we have from
|
||||||
|
// m_current_position..m_size free, as well as and 0..gpu_position.
|
||||||
|
const u32 remaining_space_after_offset = m_size - m_current_offset;
|
||||||
|
if (remaining_space_after_offset >= num_bytes)
|
||||||
|
{
|
||||||
|
// Switch to allocating in front of the GPU, using the remainder of the buffer.
|
||||||
|
new_offset = m_current_offset;
|
||||||
|
new_gpu_position = gpu_position;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// We can wrap around to the start, behind the GPU, if there is enough space.
|
// We can wrap around to the start, behind the GPU, if there is enough space.
|
||||||
// We use > here because otherwise we'd end up lining up with the GPU, and then the
|
// We use > here because otherwise we'd end up lining up with the GPU, and then the
|
||||||
// allocator would assume that the GPU has consumed what we just wrote.
|
// allocator would assume that the GPU has consumed what we just wrote.
|
||||||
if (gpu_position > num_bytes)
|
if (gpu_position > num_bytes)
|
||||||
{
|
{
|
||||||
new_offset = 0;
|
new_offset = 0;
|
||||||
|
new_gpu_position = gpu_position;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -292,6 +302,7 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
|
||||||
{
|
{
|
||||||
// Leave the offset as-is, but update the GPU position.
|
// Leave the offset as-is, but update the GPU position.
|
||||||
new_offset = m_current_offset;
|
new_offset = m_current_offset;
|
||||||
|
new_gpu_position = gpu_position;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -300,14 +311,17 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
|
||||||
// Did any fences satisfy this condition?
|
// Did any fences satisfy this condition?
|
||||||
// Has the command buffer been executed yet? If not, the caller should execute it.
|
// Has the command buffer been executed yet? If not, the caller should execute it.
|
||||||
if (iter == m_tracked_fences.end() ||
|
if (iter == m_tracked_fences.end() ||
|
||||||
iter->first == g_command_buffer_mgr->GetCurrentCommandBufferFence())
|
iter->first == g_command_buffer_mgr->GetCurrentFenceCounter())
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait until this fence is signaled. This will fire the callback, updating the GPU position.
|
// Wait until this fence is signaled. This will fire the callback, updating the GPU position.
|
||||||
g_command_buffer_mgr->WaitForFence(iter->first);
|
g_command_buffer_mgr->WaitForFenceCounter(iter->first);
|
||||||
|
m_tracked_fences.erase(m_tracked_fences.begin(),
|
||||||
|
m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
|
||||||
m_current_offset = new_offset;
|
m_current_offset = new_offset;
|
||||||
|
m_current_gpu_position = new_gpu_position;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ public:
|
||||||
private:
|
private:
|
||||||
bool AllocateBuffer();
|
bool AllocateBuffer();
|
||||||
void UpdateCurrentFencePosition();
|
void UpdateCurrentFencePosition();
|
||||||
void OnFenceSignaled(VkFence fence);
|
void UpdateGPUPosition();
|
||||||
|
|
||||||
// Waits for as many fences as needed to allocate num_bytes bytes from the buffer.
|
// Waits for as many fences as needed to allocate num_bytes bytes from the buffer.
|
||||||
bool WaitForClearSpace(u32 num_bytes);
|
bool WaitForClearSpace(u32 num_bytes);
|
||||||
|
@ -50,7 +50,7 @@ private:
|
||||||
u8* m_host_pointer = nullptr;
|
u8* m_host_pointer = nullptr;
|
||||||
|
|
||||||
// List of fences and the corresponding positions in the buffer
|
// List of fences and the corresponding positions in the buffer
|
||||||
std::deque<std::pair<VkFence, u32>> m_tracked_fences;
|
std::deque<std::pair<u64, u32>> m_tracked_fences;
|
||||||
|
|
||||||
bool m_coherent_mapping = false;
|
bool m_coherent_mapping = false;
|
||||||
};
|
};
|
||||||
|
|
|
@ -674,11 +674,7 @@ VKStagingTexture::VKStagingTexture(StagingTextureType type, const TextureConfig&
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
VKStagingTexture::~VKStagingTexture()
|
VKStagingTexture::~VKStagingTexture() = default;
|
||||||
{
|
|
||||||
if (m_needs_flush)
|
|
||||||
VKStagingTexture::Flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<VKStagingTexture> VKStagingTexture::Create(StagingTextureType type,
|
std::unique_ptr<VKStagingTexture> VKStagingTexture::Create(StagingTextureType type,
|
||||||
const TextureConfig& config)
|
const TextureConfig& config)
|
||||||
|
@ -739,14 +735,6 @@ void VKStagingTexture::CopyFromTexture(const AbstractTexture* src,
|
||||||
ASSERT(dst_rect.left >= 0 && static_cast<u32>(dst_rect.right) <= m_config.width &&
|
ASSERT(dst_rect.left >= 0 && static_cast<u32>(dst_rect.right) <= m_config.width &&
|
||||||
dst_rect.top >= 0 && static_cast<u32>(dst_rect.bottom) <= m_config.height);
|
dst_rect.top >= 0 && static_cast<u32>(dst_rect.bottom) <= m_config.height);
|
||||||
|
|
||||||
if (m_needs_flush)
|
|
||||||
{
|
|
||||||
// Drop copy before reusing it.
|
|
||||||
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
|
|
||||||
m_flush_fence = VK_NULL_HANDLE;
|
|
||||||
m_needs_flush = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateTracker::GetInstance()->EndRenderPass();
|
StateTracker::GetInstance()->EndRenderPass();
|
||||||
|
|
||||||
VkImageLayout old_layout = src_tex->GetLayout();
|
VkImageLayout old_layout = src_tex->GetLayout();
|
||||||
|
@ -773,16 +761,7 @@ void VKStagingTexture::CopyFromTexture(const AbstractTexture* src,
|
||||||
src_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout);
|
src_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout);
|
||||||
|
|
||||||
m_needs_flush = true;
|
m_needs_flush = true;
|
||||||
m_flush_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
|
m_flush_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter();
|
||||||
g_command_buffer_mgr->AddFenceSignaledCallback(this, [this](VkFence fence) {
|
|
||||||
if (m_flush_fence != fence)
|
|
||||||
return;
|
|
||||||
|
|
||||||
m_flush_fence = VK_NULL_HANDLE;
|
|
||||||
m_needs_flush = false;
|
|
||||||
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
|
|
||||||
m_staging_buffer->InvalidateCPUCache();
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle<int>& src_rect, AbstractTexture* dst,
|
void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle<int>& src_rect, AbstractTexture* dst,
|
||||||
|
@ -798,14 +777,6 @@ void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle<int>& src_rect, A
|
||||||
ASSERT(dst_rect.left >= 0 && static_cast<u32>(dst_rect.right) <= dst_tex->GetWidth() &&
|
ASSERT(dst_rect.left >= 0 && static_cast<u32>(dst_rect.right) <= dst_tex->GetWidth() &&
|
||||||
dst_rect.top >= 0 && static_cast<u32>(dst_rect.bottom) <= dst_tex->GetHeight());
|
dst_rect.top >= 0 && static_cast<u32>(dst_rect.bottom) <= dst_tex->GetHeight());
|
||||||
|
|
||||||
if (m_needs_flush)
|
|
||||||
{
|
|
||||||
// Drop copy before reusing it.
|
|
||||||
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
|
|
||||||
m_flush_fence = VK_NULL_HANDLE;
|
|
||||||
m_needs_flush = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush caches before copying.
|
// Flush caches before copying.
|
||||||
m_staging_buffer->FlushCPUCache();
|
m_staging_buffer->FlushCPUCache();
|
||||||
StateTracker::GetInstance()->EndRenderPass();
|
StateTracker::GetInstance()->EndRenderPass();
|
||||||
|
@ -833,15 +804,7 @@ void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle<int>& src_rect, A
|
||||||
dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout);
|
dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout);
|
||||||
|
|
||||||
m_needs_flush = true;
|
m_needs_flush = true;
|
||||||
m_flush_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
|
m_flush_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter();
|
||||||
g_command_buffer_mgr->AddFenceSignaledCallback(this, [this](VkFence fence) {
|
|
||||||
if (m_flush_fence != fence)
|
|
||||||
return;
|
|
||||||
|
|
||||||
m_flush_fence = VK_NULL_HANDLE;
|
|
||||||
m_needs_flush = false;
|
|
||||||
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VKStagingTexture::Map()
|
bool VKStagingTexture::Map()
|
||||||
|
@ -860,25 +823,23 @@ void VKStagingTexture::Flush()
|
||||||
if (!m_needs_flush)
|
if (!m_needs_flush)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Either of the below two calls will cause the callback to fire.
|
// Is this copy in the current command buffer?
|
||||||
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
|
if (g_command_buffer_mgr->GetCurrentFenceCounter() == m_flush_fence_counter)
|
||||||
if (m_flush_fence == g_command_buffer_mgr->GetCurrentCommandBufferFence())
|
|
||||||
{
|
{
|
||||||
// The readback is in the current command buffer, and we must execute it.
|
// Execute the command buffer and wait for it to finish.
|
||||||
Renderer::GetInstance()->ExecuteCommandBuffer(false, true);
|
Renderer::GetInstance()->ExecuteCommandBuffer(false, true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// WaitForFence should fire the callback.
|
// Wait for the GPU to finish with it.
|
||||||
g_command_buffer_mgr->WaitForFence(m_flush_fence);
|
g_command_buffer_mgr->WaitForFenceCounter(m_flush_fence_counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_ASSERT(m_flush_fence == VK_NULL_HANDLE);
|
|
||||||
m_needs_flush = false;
|
|
||||||
|
|
||||||
// For readback textures, invalidate the CPU cache as there is new data there.
|
// For readback textures, invalidate the CPU cache as there is new data there.
|
||||||
if (m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable)
|
if (m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable)
|
||||||
m_staging_buffer->InvalidateCPUCache();
|
m_staging_buffer->InvalidateCPUCache();
|
||||||
|
|
||||||
|
m_needs_flush = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
VKFramebuffer::VKFramebuffer(VKTexture* color_attachment, VKTexture* depth_attachment, u32 width,
|
VKFramebuffer::VKFramebuffer(VKTexture* color_attachment, VKTexture* depth_attachment, u32 width,
|
||||||
|
|
|
@ -104,7 +104,7 @@ private:
|
||||||
std::unique_ptr<StagingBuffer> buffer);
|
std::unique_ptr<StagingBuffer> buffer);
|
||||||
|
|
||||||
std::unique_ptr<StagingBuffer> m_staging_buffer;
|
std::unique_ptr<StagingBuffer> m_staging_buffer;
|
||||||
VkFence m_flush_fence = VK_NULL_HANDLE;
|
u64 m_flush_fence_counter = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class VKFramebuffer final : public AbstractFramebuffer
|
class VKFramebuffer final : public AbstractFramebuffer
|
||||||
|
|
|
@ -60,11 +60,11 @@ VertexManager::~VertexManager()
|
||||||
bool VertexManager::Initialize()
|
bool VertexManager::Initialize()
|
||||||
{
|
{
|
||||||
m_vertex_stream_buffer =
|
m_vertex_stream_buffer =
|
||||||
StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_STREAM_BUFFER_SIZE * 4);
|
StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_STREAM_BUFFER_SIZE);
|
||||||
m_index_stream_buffer =
|
m_index_stream_buffer =
|
||||||
StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE * 4);
|
StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE);
|
||||||
m_uniform_stream_buffer =
|
m_uniform_stream_buffer =
|
||||||
StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_STREAM_BUFFER_SIZE * 4);
|
StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_STREAM_BUFFER_SIZE);
|
||||||
if (!m_vertex_stream_buffer || !m_index_stream_buffer || !m_uniform_stream_buffer)
|
if (!m_vertex_stream_buffer || !m_index_stream_buffer || !m_uniform_stream_buffer)
|
||||||
{
|
{
|
||||||
PanicAlert("Failed to allocate streaming buffers");
|
PanicAlert("Failed to allocate streaming buffers");
|
||||||
|
|
|
@ -251,8 +251,8 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi)
|
||||||
|
|
||||||
void VideoBackend::Shutdown()
|
void VideoBackend::Shutdown()
|
||||||
{
|
{
|
||||||
if (g_command_buffer_mgr)
|
if (g_vulkan_context)
|
||||||
g_command_buffer_mgr->WaitForGPUIdle();
|
vkDeviceWaitIdle(g_vulkan_context->GetDevice());
|
||||||
|
|
||||||
if (g_shader_cache)
|
if (g_shader_cache)
|
||||||
g_shader_cache->Shutdown();
|
g_shader_cache->Shutdown();
|
||||||
|
|
|
@ -96,6 +96,9 @@ TextureCacheBase::TextureCacheBase()
|
||||||
|
|
||||||
TextureCacheBase::~TextureCacheBase()
|
TextureCacheBase::~TextureCacheBase()
|
||||||
{
|
{
|
||||||
|
// Clear pending EFB copies first, so we don't try to flush them.
|
||||||
|
m_pending_efb_copies.clear();
|
||||||
|
|
||||||
HiresTexture::Shutdown();
|
HiresTexture::Shutdown();
|
||||||
Invalidate();
|
Invalidate();
|
||||||
Common::FreeAlignedMemory(temp);
|
Common::FreeAlignedMemory(temp);
|
||||||
|
|
Loading…
Reference in New Issue