From c43ccc073da10fcf2ba43c4f90746cbc9b26880d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 4 Dec 2019 21:42:26 +0300 Subject: [PATCH] [D3D12] Submit command lists on primary buffer end --- src/xenia/gpu/command_processor.cc | 2 ++ src/xenia/gpu/command_processor.h | 1 + src/xenia/gpu/d3d12/d3d12_command_processor.cc | 15 +++++++++++++++ src/xenia/gpu/d3d12/d3d12_command_processor.h | 7 +++++++ src/xenia/gpu/d3d12/pipeline_cache.cc | 8 ++++++++ src/xenia/gpu/d3d12/pipeline_cache.h | 1 + 6 files changed, 34 insertions(+) diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 51dcc9ac0..a66057e5b 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -464,6 +464,8 @@ uint32_t CommandProcessor::ExecutePrimaryBuffer(uint32_t read_index, } } while (reader.read_count()); + OnPrimaryBufferEnd(); + trace_writer_.WritePrimaryBufferEnd(); return write_index; diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index 7f5cc5755..e613d5a03 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -177,6 +177,7 @@ class CommandProcessor { uint32_t frontbuffer_height) = 0; uint32_t ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index); + virtual void OnPrimaryBufferEnd() {} void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length); bool ExecutePacket(RingBuffer* reader); bool ExecutePacketType0(RingBuffer* reader, uint32_t packet); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 39aa3961e..484b48739 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -50,6 +50,10 @@ DEFINE_bool(d3d12_ssaa_custom_sample_positions, false, "path where available instead of centers (experimental, not very " "high-quality).", "D3D12"); +DEFINE_bool(d3d12_submit_on_primary_buffer_end, true, + "Submit the command list when a PM4 primary buffer ends if it's " + "possible to submit immediately to try to reduce frame latency.", + "D3D12"); namespace xe { namespace gpu { @@ -1222,6 +1226,13 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, EndSubmission(true); } +void D3D12CommandProcessor::OnPrimaryBufferEnd() { + if (cvars::d3d12_submit_on_primary_buffer_end && submission_open_ && + CanEndSubmissionImmediately()) { + EndSubmission(false); + } +} + Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, @@ -2073,6 +2084,10 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) { return true; } +bool D3D12CommandProcessor::CanEndSubmissionImmediately() const { + return !submission_open_ || !pipeline_cache_->IsCreatingPipelines(); +} + void D3D12CommandProcessor::AwaitAllSubmissionsCompletion() { // May be called if shutting down without everything set up. if ((submission_completed_ + 1) >= submission_current_ || diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 25fbe4001..4a1050d61 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -164,6 +164,8 @@ class D3D12CommandProcessor : public CommandProcessor { void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) override; + void OnPrimaryBufferEnd() override; + Shader* LoadShader(ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) override; @@ -235,6 +237,11 @@ class D3D12CommandProcessor : public CommandProcessor { // clearing and stopping capturing. Returns whether the submission was done // successfully, if it has failed, leaves it open. bool EndSubmission(bool is_swap); + // Checks if ending a submission right now would not cause potentially more + // delay than it would reduce by making the GPU start working earlier - such + // as when there are unfinished graphics pipeline state creation requests that + // would need to be fulfilled before actually submitting the command list. + bool CanEndSubmissionImmediately() const; void AwaitAllSubmissionsCompletion(); // Need to await submission completion before calling. void ClearCommandAllocatorCache(); diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index ee13f0fab..f4f374e7c 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -172,6 +172,14 @@ void PipelineCache::EndSubmission() { } } +bool PipelineCache::IsCreatingPipelines() { + if (creation_threads_.empty()) { + return false; + } + std::lock_guard lock(creation_request_lock_); + return !creation_queue_.empty() || creation_threads_busy_ != 0; +} + D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index dc0b03c00..fe1ff1bc1 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -43,6 +43,7 @@ class PipelineCache { void ClearCache(); void EndSubmission(); + bool IsCreatingPipelines(); D3D12Shader* LoadShader(ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count);