Merge pull request #7953 from stenzek/lazy-d3d12-perf-queries

D3D12: Fix case where perf queries weren't flushed
This commit is contained in:
Connor McLaughlin 2019-04-01 21:40:20 +10:00 committed by GitHub
commit 8e1fb126d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 45 additions and 14 deletions

View File

@ -57,6 +57,12 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
PartialFlush(do_resolve, blocking); PartialFlush(do_resolve, blocking);
} }
// Ensure all state is applied before beginning the query.
// This is because we can't leave a query open when submitting a command list, and the draw
// call itself may need to execute a command list if we run out of descriptors. Note that
// this assumes that the caller has bound all required state prior to enabling the query.
Renderer::GetInstance()->ApplyState();
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{ {
ActiveQuery& entry = m_query_buffer[m_query_next_pos]; ActiveQuery& entry = m_query_buffer[m_query_next_pos];
@ -152,9 +158,9 @@ void PerfQuery::ResolveQueries(u32 query_count)
m_unresolved_queries -= query_count; m_unresolved_queries -= query_count;
} }
void PerfQuery::ReadbackQueries() void PerfQuery::ReadbackQueries(bool blocking)
{ {
const u64 completed_fence_counter = g_dx_context->GetCompletedFenceValue(); u64 completed_fence_counter = g_dx_context->GetCompletedFenceValue();
// Need to save these since ProcessResults will modify them. // Need to save these since ProcessResults will modify them.
const u32 outstanding_queries = m_query_count; const u32 outstanding_queries = m_query_count;
@ -163,13 +169,24 @@ void PerfQuery::ReadbackQueries()
{ {
u32 index = (m_query_readback_pos + readback_count) % PERF_QUERY_BUFFER_SIZE; u32 index = (m_query_readback_pos + readback_count) % PERF_QUERY_BUFFER_SIZE;
const ActiveQuery& entry = m_query_buffer[index]; const ActiveQuery& entry = m_query_buffer[index];
if (!entry.resolved || entry.fence_value > completed_fence_counter) if (!entry.resolved)
break; break;
if (entry.fence_value > completed_fence_counter)
{
// Query result isn't ready yet. Wait if blocking, otherwise we can't do any more yet.
if (!blocking)
break;
ASSERT(entry.fence_value != g_dx_context->GetCurrentFenceValue());
g_dx_context->WaitForFence(entry.fence_value);
completed_fence_counter = g_dx_context->GetCompletedFenceValue();
}
// If this wrapped around, we need to flush the entries before the end of the buffer. // If this wrapped around, we need to flush the entries before the end of the buffer.
if (index < m_query_readback_pos) if (index < m_query_readback_pos)
{ {
ReadbackQueries(readback_count); AccumulateQueriesFromBuffer(readback_count);
DEBUG_ASSERT(m_query_readback_pos == 0); DEBUG_ASSERT(m_query_readback_pos == 0);
readback_count = 0; readback_count = 0;
} }
@ -178,10 +195,10 @@ void PerfQuery::ReadbackQueries()
} }
if (readback_count > 0) if (readback_count > 0)
ReadbackQueries(readback_count); AccumulateQueriesFromBuffer(readback_count);
} }
void PerfQuery::ReadbackQueries(u32 query_count) void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count)
{ {
// Should be at maximum query_count queries pending. // Should be at maximum query_count queries pending.
ASSERT(query_count <= m_query_count && ASSERT(query_count <= m_query_count &&
@ -226,10 +243,10 @@ void PerfQuery::ReadbackQueries(u32 query_count)
void PerfQuery::PartialFlush(bool resolve, bool blocking) void PerfQuery::PartialFlush(bool resolve, bool blocking)
{ {
// Submit a command buffer in the background if the front query is not bound to one. // Submit a command buffer if there are unresolved queries (to write them to the buffer).
if ((resolve || blocking) && !m_query_buffer[m_query_resolve_pos].resolved) if (resolve && m_unresolved_queries > 0)
Renderer::GetInstance()->ExecuteCommandList(blocking); Renderer::GetInstance()->ExecuteCommandList(false);
ReadbackQueries(); ReadbackQueries(blocking);
} }
} // namespace DX12 } // namespace DX12

View File

@ -38,8 +38,8 @@ private:
}; };
void ResolveQueries(u32 query_count); void ResolveQueries(u32 query_count);
void ReadbackQueries(); void ReadbackQueries(bool blocking);
void ReadbackQueries(u32 query_count); void AccumulateQueriesFromBuffer(u32 query_count);
void PartialFlush(bool resolve, bool blocking); void PartialFlush(bool resolve, bool blocking);

View File

@ -89,6 +89,9 @@ public:
void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size); void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size);
void SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format); void SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format);
// Binds all dirty state
bool ApplyState();
protected: protected:
void OnConfigChanged(u32 bits) override; void OnConfigChanged(u32 bits) override;
@ -131,8 +134,6 @@ private:
void CheckForSwapChainChanges(); void CheckForSwapChainChanges();
// Binds all dirty state
bool ApplyState();
void BindFramebuffer(DXFramebuffer* fb); void BindFramebuffer(DXFramebuffer* fb);
void SetRootSignatures(); void SetRootSignatures();
void SetDescriptorHeaps(); void SetDescriptorHeaps();

View File

@ -45,6 +45,10 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
if (m_query_count > m_query_buffer.size() / 2) if (m_query_count > m_query_buffer.size() / 2)
PartialFlush(m_query_count == PERF_QUERY_BUFFER_SIZE); PartialFlush(m_query_count == PERF_QUERY_BUFFER_SIZE);
// Ensure command buffer is ready to go before beginning the query, that way we don't submit
// a buffer with open queries.
StateTracker::GetInstance()->Bind();
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{ {
ActiveQuery& entry = m_query_buffer[m_query_next_pos]; ActiveQuery& entry = m_query_buffer[m_query_next_pos];

View File

@ -31,22 +31,31 @@ class PerfQueryBase
public: public:
PerfQueryBase() : m_query_count(0) {} PerfQueryBase() : m_query_count(0) {}
virtual ~PerfQueryBase() {} virtual ~PerfQueryBase() {}
// Checks if performance queries are enabled in the gameini configuration. // Checks if performance queries are enabled in the gameini configuration.
// NOTE: Called from CPU+GPU thread // NOTE: Called from CPU+GPU thread
static bool ShouldEmulate(); static bool ShouldEmulate();
// Begin querying the specified value for the following host GPU commands // Begin querying the specified value for the following host GPU commands
// The call to EnableQuery() should be placed immediately before the draw command, otherwise
// there is a risk of GPU resets if the query is left open and the buffer is submitted during
// resource binding (D3D12/Vulkan).
virtual void EnableQuery(PerfQueryGroup type) {} virtual void EnableQuery(PerfQueryGroup type) {}
// Stop querying the specified value for the following host GPU commands // Stop querying the specified value for the following host GPU commands
virtual void DisableQuery(PerfQueryGroup type) {} virtual void DisableQuery(PerfQueryGroup type) {}
// Reset query counters to zero and drop any pending queries // Reset query counters to zero and drop any pending queries
virtual void ResetQuery() {} virtual void ResetQuery() {}
// Return the measured value for the specified query type // Return the measured value for the specified query type
// NOTE: Called from CPU thread // NOTE: Called from CPU thread
virtual u32 GetQueryResult(PerfQueryType type) { return 0; } virtual u32 GetQueryResult(PerfQueryType type) { return 0; }
// Request the value of any pending queries - causes a pipeline flush and thus should be used // Request the value of any pending queries - causes a pipeline flush and thus should be used
// carefully! // carefully!
virtual void FlushResults() {} virtual void FlushResults() {}
// True if there are no further pending query results // True if there are no further pending query results
// NOTE: Called from CPU thread // NOTE: Called from CPU thread
virtual bool IsFlushed() const { return true; } virtual bool IsFlushed() const { return true; }