Merge pull request #4480 from stenzek/vulkan-streambuffer-reuse

Vulkan: Fix stream buffer re-use problems
This commit is contained in:
Stenzek 2016-12-01 12:38:31 +10:00 committed by GitHub
commit e2018f2208
9 changed files with 85 additions and 99 deletions

View File

@ -738,9 +738,7 @@ bool FramebufferManager::PopulateColorReadbackTexture()
}
// Wait until the copy is complete.
g_command_buffer_mgr->ExecuteCommandBuffer(false, true);
StateTracker::GetInstance()->InvalidateDescriptorSets();
StateTracker::GetInstance()->SetPendingRebind();
Util::ExecuteCurrentCommandsAndRestoreState(false, true);
// Map to host memory.
if (!m_color_readback_texture->IsMapped() && !m_color_readback_texture->Map())
@ -822,9 +820,7 @@ bool FramebufferManager::PopulateDepthReadbackTexture()
}
// Wait until the copy is complete.
g_command_buffer_mgr->ExecuteCommandBuffer(false, true);
StateTracker::GetInstance()->InvalidateDescriptorSets();
StateTracker::GetInstance()->SetPendingRebind();
Util::ExecuteCurrentCommandsAndRestoreState(false, true);
// Map to host memory.
if (!m_depth_readback_texture->IsMapped() && !m_depth_readback_texture->Map())
@ -1212,7 +1208,7 @@ void FramebufferManager::DrawPokeVertices(const EFBPokeVertex* vertices, size_t
{
// Kick a command buffer first.
WARN_LOG(VIDEO, "Kicking command buffer due to no EFB poke space.");
Util::ExecuteCurrentCommandsAndRestoreState(true);
Util::ExecuteCurrentCommandsAndRestoreState(false);
command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer();
if (!m_poke_vertex_stream_buffer->ReserveMemory(vertices_size, sizeof(EfbPokeData), true, true,

View File

@ -326,6 +326,7 @@ void Renderer::BeginFrame()
// Ensure that the state tracker rebinds everything, and allocates a new set
// of descriptors out of the next pool.
StateTracker::GetInstance()->InvalidateDescriptorSets();
StateTracker::GetInstance()->InvalidateConstants();
StateTracker::GetInstance()->SetPendingRebind();
}

View File

@ -357,20 +357,9 @@ bool StateTracker::CheckForShaderChanges(u32 gx_primitive_type, DSTALPHA_MODE ds
void StateTracker::UpdateVertexShaderConstants()
{
if (!VertexShaderManager::dirty)
if (!VertexShaderManager::dirty || !ReserveConstantStorage())
return;
// Since the other stages uniform buffers' may be still be using the earlier data,
// we can't reuse the earlier part of the buffer without re-uploading everything.
if (!m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size,
g_vulkan_context->GetUniformBufferAlignment(), false,
false, false))
{
// Re-upload all constants to a new portion of the buffer.
UploadAllConstants();
return;
}
// Buffer allocation changed?
if (m_uniform_stream_buffer->GetBuffer() !=
m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS].buffer)
@ -394,17 +383,9 @@ void StateTracker::UpdateVertexShaderConstants()
void StateTracker::UpdateGeometryShaderConstants()
{
// Skip updating geometry shader constants if it's not in use.
if (m_pipeline_state.gs == VK_NULL_HANDLE || !GeometryShaderManager::dirty)
return;
// Since the other stages uniform buffers' may be still be using the earlier data,
// we can't reuse the earlier part of the buffer without re-uploading everything.
if (!m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size,
g_vulkan_context->GetUniformBufferAlignment(), false,
false, false))
if (m_pipeline_state.gs == VK_NULL_HANDLE || !GeometryShaderManager::dirty ||
!ReserveConstantStorage())
{
// Re-upload all constants to a new portion of the buffer.
UploadAllConstants();
return;
}
@ -430,20 +411,9 @@ void StateTracker::UpdateGeometryShaderConstants()
void StateTracker::UpdatePixelShaderConstants()
{
if (!PixelShaderManager::dirty)
if (!PixelShaderManager::dirty || !ReserveConstantStorage())
return;
// Since the other stages uniform buffers' may be still be using the earlier data,
// we can't reuse the earlier part of the buffer without re-uploading everything.
if (!m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size,
g_vulkan_context->GetUniformBufferAlignment(), false,
false, false))
{
// Re-upload all constants to a new portion of the buffer.
UploadAllConstants();
return;
}
// Buffer allocation changed?
if (m_uniform_stream_buffer->GetBuffer() !=
m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_PS].buffer)
@ -464,33 +434,44 @@ void StateTracker::UpdatePixelShaderConstants()
PixelShaderManager::dirty = false;
}
bool StateTracker::ReserveConstantStorage()
{
// Since we invalidate all constants on command buffer execution, it doesn't matter if this
// causes the stream buffer to be resized.
if (m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size,
g_vulkan_context->GetUniformBufferAlignment(), true,
true, false))
{
return true;
}
// The only places that call constant updates are safe to have state restored.
WARN_LOG(VIDEO, "Executing command buffer while waiting for space in uniform buffer");
Util::ExecuteCurrentCommandsAndRestoreState(false);
// Since we are on a new command buffer, all constants have been invalidated, and we need
// to reupload them. We may as well do this now, since we're issuing a draw anyway.
UploadAllConstants();
return false;
}
void StateTracker::UploadAllConstants()
{
// We are free to re-use parts of the buffer now since we're uploading all constants.
size_t ub_alignment = g_vulkan_context->GetUniformBufferAlignment();
size_t pixel_constants_offset = 0;
size_t vertex_constants_offset =
Util::AlignValue(pixel_constants_offset + sizeof(PixelShaderConstants),
g_vulkan_context->GetUniformBufferAlignment());
Util::AlignValue(pixel_constants_offset + sizeof(PixelShaderConstants), ub_alignment);
size_t geometry_constants_offset =
Util::AlignValue(vertex_constants_offset + sizeof(VertexShaderConstants),
g_vulkan_context->GetUniformBufferAlignment());
size_t total_allocation_size = geometry_constants_offset + sizeof(GeometryShaderConstants);
Util::AlignValue(vertex_constants_offset + sizeof(VertexShaderConstants), ub_alignment);
size_t allocation_size = geometry_constants_offset + sizeof(GeometryShaderConstants);
// Allocate everything at once.
if (!m_uniform_stream_buffer->ReserveMemory(
total_allocation_size, g_vulkan_context->GetUniformBufferAlignment(), true, true, false))
// We should only be here if the buffer was full and a command buffer was submitted anyway.
if (!m_uniform_stream_buffer->ReserveMemory(allocation_size, ub_alignment, true, true, false))
{
// If this fails, wait until the GPU has caught up.
// The only places that call constant updates are safe to have state restored.
WARN_LOG(VIDEO, "Executing command list while waiting for space in uniform buffer");
Util::ExecuteCurrentCommandsAndRestoreState(false);
if (!m_uniform_stream_buffer->ReserveMemory(total_allocation_size,
g_vulkan_context->GetUniformBufferAlignment(), true,
true, false))
{
PanicAlert("Failed to allocate space for constants in streaming buffer");
return;
}
PanicAlert("Failed to allocate space for constants in streaming buffer");
return;
}
// Update bindings
@ -528,7 +509,7 @@ void StateTracker::UploadAllConstants()
&GeometryShaderManager::constants, sizeof(GeometryShaderConstants));
// Finally, flush buffer memory after copying
m_uniform_stream_buffer->CommitMemory(total_allocation_size);
m_uniform_stream_buffer->CommitMemory(allocation_size);
// Clear dirty flags
VertexShaderManager::dirty = false;
@ -616,6 +597,13 @@ void StateTracker::InvalidateDescriptorSets()
m_dirty_flags &= ~DIRTY_FLAG_PS_SSBO;
}
void StateTracker::InvalidateConstants()
{
VertexShaderManager::dirty = true;
GeometryShaderManager::dirty = true;
PixelShaderManager::dirty = true;
}
void StateTracker::SetPendingRebind()
{
m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_DESCRIPTOR_SET_BINDING |
@ -708,11 +696,7 @@ bool StateTracker::Bind(bool rebind_all /*= false*/)
{
// We can fail to allocate descriptors if we exhaust the pool for this command buffer.
WARN_LOG(VIDEO, "Failed to get a descriptor set, executing buffer");
// Try again after executing the current buffer.
g_command_buffer_mgr->ExecuteCommandBuffer(false, false);
InvalidateDescriptorSets();
SetPendingRebind();
Util::ExecuteCurrentCommandsAndRestoreState(false, false);
if (!UpdateDescriptorSet())
{
// Something strange going on.
@ -775,10 +759,7 @@ void StateTracker::OnDraw()
m_scheduled_command_buffer_kicks.end(), m_draw_counter))
{
// Kick a command buffer on the background thread.
EndRenderPass();
g_command_buffer_mgr->ExecuteCommandBuffer(true, false);
InvalidateDescriptorSets();
SetPendingRebind();
Util::ExecuteCurrentCommandsAndRestoreState(true);
}
}

View File

@ -76,6 +76,9 @@ public:
// now be in a different pool for the new command buffer.
void InvalidateDescriptorSets();
// Same with the uniforms, as the current storage will belong to the previous command buffer.
void InvalidateConstants();
// Set dirty flags on everything to force re-bind at next draw time.
void SetPendingRebind();
@ -171,6 +174,11 @@ private:
bool UpdatePipeline();
bool UpdateDescriptorSet();
// Allocates storage in the uniform buffer of the specified size. If this storage cannot be
// allocated immediately, the current command buffer will be submitted and all stage's
// constants will be re-uploaded. false will be returned in this case, otherwise true.
bool ReserveConstantStorage();
void UploadAllConstants();
// Which bindings/state has to be updated before the next draw.

View File

@ -286,8 +286,11 @@ bool StreamBuffer::WaitForClearSpace(size_t num_bytes)
for (; iter != m_tracked_fences.end(); iter++)
{
// Would this fence bring us in line with the GPU?
// This is the "last resort" case, where a command buffer execution has been forced
// after no additional data has been written to it, so we can assume that after the
// fence has been signaled the entire buffer is now consumed.
size_t gpu_position = iter->second;
if (gpu_position == m_current_offset)
if (m_current_offset == gpu_position)
{
// Start at the start of the buffer again.
new_offset = 0;
@ -295,12 +298,12 @@ bool StreamBuffer::WaitForClearSpace(size_t num_bytes)
break;
}
// We can wrap around to the start, behind the GPU, if there is enough space.
// We use > here because otherwise we'd end up lining up with the GPU, and then the
// allocator would assume that the GPU has consumed what we just wrote.
if (m_current_offset >= m_current_gpu_position)
// Assuming that we wait for this fence, are we allocating in front of the GPU?
if (m_current_offset > gpu_position)
{
// Wrap around to the start (behind the GPU) if there is sufficient space.
// We can wrap around to the start, behind the GPU, if there is enough space.
// We use > here because otherwise we'd end up lining up with the GPU, and then the
// allocator would assume that the GPU has consumed what we just wrote.
if (gpu_position > num_bytes)
{
new_offset = 0;
@ -310,19 +313,16 @@ bool StreamBuffer::WaitForClearSpace(size_t num_bytes)
}
else
{
// We're currently allocating behind the GPU. Therefore, if this fence is behind us,
// and it's the last fence in the list (no data has been written after it), we can
// move back to allocating in front of the GPU.
if (gpu_position < m_current_offset)
// We're currently allocating behind the GPU. This would give us between the current
// offset and the GPU position worth of space to work with. Again, > because we can't
// align the GPU position with the buffer offset.
size_t available_space_inbetween = m_current_offset - gpu_position;
if (available_space_inbetween > num_bytes)
{
if (std::none_of(iter, m_tracked_fences.end(),
[gpu_position](const auto& it) { return it.second > gpu_position; }))
{
// Wait for this fence to complete, then allocate directly after it.
new_offset = gpu_position;
new_gpu_position = gpu_position;
break;
}
// Leave the offset as-is, but update the GPU position.
new_offset = m_current_offset;
new_gpu_position = gpu_position;
break;
}
}
}

View File

@ -145,10 +145,10 @@ void TextureCache::CopyEFB(u8* dst, u32 format, u32 native_width, u32 bytes_per_
else
src_texture = FramebufferManager::GetInstance()->ResolveEFBColorTexture(region);
// End render pass before barrier (since we have no self-dependencies)
// End render pass before barrier (since we have no self-dependencies).
// The barrier has to happen after the render pass, not inside it, as we are going to be
// reading from the texture immediately afterwards.
StateTracker::GetInstance()->EndRenderPass();
StateTracker::GetInstance()->SetPendingRebind();
StateTracker::GetInstance()->InvalidateDescriptorSets();
StateTracker::GetInstance()->OnReadback();
// Transition to shader resource before reading.
@ -666,9 +666,7 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
// Block until the GPU has finished copying to the staging texture.
g_command_buffer_mgr->ExecuteCommandBuffer(false, true);
StateTracker::GetInstance()->InvalidateDescriptorSets();
StateTracker::GetInstance()->SetPendingRebind();
Util::ExecuteCurrentCommandsAndRestoreState(false, true);
// Map the staging texture so we can copy the contents out.
if (staging_texture->Map())

View File

@ -121,9 +121,7 @@ void TextureEncoder::EncodeTextureToRam(VkImageView src_texture, u8* dest_ptr, u
render_width, render_height, 0, 0);
// Block until the GPU has finished copying to the staging texture.
g_command_buffer_mgr->ExecuteCommandBuffer(false, true);
StateTracker::GetInstance()->InvalidateDescriptorSets();
StateTracker::GetInstance()->SetPendingRebind();
Util::ExecuteCurrentCommandsAndRestoreState(false, true);
// Copy from staging texture to the final destination, adjusting pitch if necessary.
m_download_texture->ReadTexels(0, 0, render_width, render_height, dest_ptr, memory_stride);

View File

@ -200,6 +200,7 @@ void ExecuteCurrentCommandsAndRestoreState(bool execute_off_thread, bool wait_fo
StateTracker::GetInstance()->EndRenderPass();
g_command_buffer_mgr->ExecuteCommandBuffer(execute_off_thread, wait_for_completion);
StateTracker::GetInstance()->InvalidateDescriptorSets();
StateTracker::GetInstance()->InvalidateConstants();
StateTracker::GetInstance()->SetPendingRebind();
}

View File

@ -130,9 +130,6 @@ void VertexManager::vFlush(bool use_dst_alpha)
static_cast<VertexFormat*>(VertexLoaderManager::GetCurrentVertexFormat());
u32 vertex_stride = vertex_format->GetVertexStride();
// Commit memory to device
PrepareDrawBuffers(vertex_stride);
// Figure out the number of indices to draw
u32 index_count = IndexGenerator::GetIndexLen();
@ -169,6 +166,12 @@ void VertexManager::vFlush(bool use_dst_alpha)
StateTracker::GetInstance()->UpdateGeometryShaderConstants();
StateTracker::GetInstance()->UpdatePixelShaderConstants();
// Commit memory to device.
// NOTE: This must be done after constant upload, as a constant buffer overrun can cause
// the current command buffer to be executed, and we want the buffer space to be associated
// with the command buffer that has the corresponding draw.
PrepareDrawBuffers(vertex_stride);
// Flush all EFB pokes and invalidate the peek cache.
FramebufferManager::GetInstance()->InvalidatePeekCache();
FramebufferManager::GetInstance()->FlushEFBPokes();