diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index cc06fc612..a46128410 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -55,6 +55,7 @@ namespace xe { namespace gpu { namespace d3d12 { +constexpr uint32_t D3D12CommandProcessor::kQueuedFrames; constexpr uint32_t D3D12CommandProcessor::RootExtraParameterIndices::kUnavailable; constexpr uint32_t D3D12CommandProcessor::kSwapTextureWidth; @@ -440,8 +441,8 @@ uint64_t D3D12CommandProcessor::RequestViewDescriptors( D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) { uint32_t descriptor_index; uint64_t current_heap_index = view_heap_pool_->Request( - GetD3D12Context()->GetCurrentFrame(), previous_heap_index, - count_for_partial_update, count_for_full_update, descriptor_index); + fence_current_value_, previous_heap_index, count_for_partial_update, + count_for_full_update, descriptor_index); if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { // There was an error. return ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; @@ -466,8 +467,8 @@ uint64_t D3D12CommandProcessor::RequestSamplerDescriptors( D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) { uint32_t descriptor_index; uint64_t current_heap_index = sampler_heap_pool_->Request( - GetD3D12Context()->GetCurrentFrame(), previous_heap_index, - count_for_partial_update, count_for_full_update, descriptor_index); + fence_current_value_, previous_heap_index, count_for_partial_update, + count_for_full_update, descriptor_index); if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { // There was an error. return ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; @@ -490,9 +491,9 @@ uint64_t D3D12CommandProcessor::RequestSamplerDescriptors( ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( uint32_t size, D3D12_RESOURCE_STATES state) { - assert_true(current_queue_frame_ != UINT_MAX); + assert_true(submission_open_); assert_false(scratch_buffer_used_); - if (current_queue_frame_ == UINT_MAX || scratch_buffer_used_ || size == 0) { + if (!submission_open_ || scratch_buffer_used_ || size == 0) { return nullptr; } @@ -505,8 +506,7 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( size = xe::align(size, kScratchBufferSizeIncrement); - auto context = GetD3D12Context(); - auto device = context->GetD3D12Provider()->GetDevice(); + auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc( buffer_desc, size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); @@ -520,7 +520,7 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( if (scratch_buffer_ != nullptr) { BufferForDeletion buffer_for_deletion; buffer_for_deletion.buffer = scratch_buffer_; - buffer_for_deletion.last_usage_frame = GetD3D12Context()->GetCurrentFrame(); + buffer_for_deletion.last_usage_fence_value = fence_current_value_; buffers_for_deletion_.push_back(buffer_for_deletion); } scratch_buffer_ = buffer; @@ -532,7 +532,7 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( void D3D12CommandProcessor::ReleaseScratchGPUBuffer( ID3D12Resource* buffer, D3D12_RESOURCE_STATES new_state) { - assert_true(current_queue_frame_ != UINT_MAX); + assert_true(submission_open_); assert_true(scratch_buffer_used_); scratch_buffer_used_ = false; if (buffer == scratch_buffer_) { @@ -552,8 +552,10 @@ void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) { if (cvars::d3d12_ssaa_custom_sample_positions && !IsROVUsedForEDRAM()) { auto provider = GetD3D12Context()->GetD3D12Provider(); auto tier = provider->GetProgrammableSamplePositionsTier(); + uint32_t command_list_index = + uint32_t((fence_current_value_ + (kQueuedFrames - 1)) % kQueuedFrames); if (tier >= 2 && - command_lists_[current_queue_frame_]->GetCommandList1() != nullptr) { + command_lists_[command_list_index]->GetCommandList1() != nullptr) { // Depth buffer transitions are affected by sample positions. SubmitBarriers(); // Standard sample positions in Direct3D 10.1, but adjusted to take the @@ -671,7 +673,7 @@ std::unique_ptr D3D12CommandProcessor::Capture() { PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); EndFrame(); - GetD3D12Context()->AwaitAllFramesCompletion(); + AwaitAllSubmissionsCompletion(); D3D12_RANGE readback_range; readback_range.Begin = swap_texture_copy_footprint_.Offset; readback_range.End = swap_texture_copy_size_; @@ -709,7 +711,20 @@ bool D3D12CommandProcessor::SetupContext() { auto device = provider->GetDevice(); auto direct_queue = provider->GetDirectQueue(); - for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) { + if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&fence_)))) { + XELOGE("Failed to create the fence"); + return false; + } + fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr); + if (fence_completion_event_ == nullptr) { + XELOGE("Failed to create the fence completion event"); + return false; + } + fence_current_value_ = 1; + fence_completed_value_ = 0; + + for (uint32_t i = 0; i < kQueuedFrames; ++i) { command_lists_[i] = ui::d3d12::CommandList::Create( device, direct_queue, D3D12_COMMAND_LIST_TYPE_DIRECT); if (command_lists_[i] == nullptr) { @@ -791,11 +806,11 @@ bool D3D12CommandProcessor::SetupContext() { return false; } // Get the layout for the upload buffer. - gamma_ramp_desc.DepthOrArraySize = ui::d3d12::D3D12Context::kQueuedFrames; + gamma_ramp_desc.DepthOrArraySize = kQueuedFrames; UINT64 gamma_ramp_upload_size; - device->GetCopyableFootprints( - &gamma_ramp_desc, 0, ui::d3d12::D3D12Context::kQueuedFrames * 2, 0, - gamma_ramp_footprints_, nullptr, nullptr, &gamma_ramp_upload_size); + device->GetCopyableFootprints(&gamma_ramp_desc, 0, kQueuedFrames * 2, 0, + gamma_ramp_footprints_, nullptr, nullptr, + &gamma_ramp_upload_size); // Create the upload buffer for the gamma ramp. ui::d3d12::util::FillBufferResourceDesc( gamma_ramp_desc, gamma_ramp_upload_size, D3D12_RESOURCE_FLAG_NONE); @@ -890,8 +905,7 @@ bool D3D12CommandProcessor::SetupContext() { } void D3D12CommandProcessor::ShutdownContext() { - auto context = GetD3D12Context(); - context->AwaitAllFramesCompletion(); + AwaitAllSubmissionsCompletion(); ui::d3d12::util::ReleaseAndNull(readback_buffer_); readback_buffer_size_ = 0; @@ -910,8 +924,8 @@ void D3D12CommandProcessor::ShutdownContext() { swap_state_.pending = false; swap_state_.front_buffer_texture = 0; } - auto graphics_system = static_cast(graphics_system_); - graphics_system->AwaitFrontBufferUnused(); + // TODO(Triang3l): Ensure this is synchronized. The display context may not + // exist at this point, so awaiting its fence doesn't always work. swap_texture_srv_descriptor_heap_->Release(); swap_texture_srv_descriptor_heap_ = nullptr; } @@ -950,10 +964,19 @@ void D3D12CommandProcessor::ShutdownContext() { shared_memory_.reset(); deferred_command_list_.reset(); - for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) { + for (uint32_t i = 0; i < kQueuedFrames; ++i) { command_lists_[i].reset(); } + // First release the fence since it may reference the event. + ui::d3d12::util::ReleaseAndNull(fence_); + if (fence_completion_event_) { + CloseHandle(fence_completion_event_); + fence_completion_event_ = nullptr; + } + fence_current_value_ = 1; + fence_completed_value_ = 0; + CommandProcessor::ShutdownContext(); } @@ -962,7 +985,7 @@ void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X && index <= XE_GPU_REG_SHADER_CONSTANT_511_W) { - if (current_queue_frame_ != UINT32_MAX) { + if (submission_open_) { uint32_t float_constant_index = (index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2; if (float_constant_index >= 256) { @@ -1013,9 +1036,11 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, auto device = provider->GetDevice(); // Upload the new gamma ramps. + uint32_t command_list_index = + uint32_t((fence_current_value_ + (kQueuedFrames - 1)) % kQueuedFrames); if (dirty_gamma_ramp_normal_) { const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint = - gamma_ramp_footprints_[current_queue_frame_ * 2]; + gamma_ramp_footprints_[command_list_index * 2]; volatile uint32_t* mapping = reinterpret_cast( gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset); for (uint32_t i = 0; i < 256; ++i) { @@ -1041,7 +1066,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, } if (dirty_gamma_ramp_pwl_) { const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint = - gamma_ramp_footprints_[current_queue_frame_ * 2 + 1]; + gamma_ramp_footprints_[command_list_index * 2 + 1]; volatile uint32_t* mapping = reinterpret_cast( gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset); for (uint32_t i = 0; i < 128; ++i) { @@ -1153,7 +1178,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, if (cache_clear_requested_) { cache_clear_requested_ = false; - GetD3D12Context()->AwaitAllFramesCompletion(); + AwaitAllSubmissionsCompletion(); ui::d3d12::util::ReleaseAndNull(scratch_buffer_); scratch_buffer_size_ = 0; @@ -1191,8 +1216,7 @@ Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type, bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) { - auto context = GetD3D12Context(); - auto device = context->GetD3D12Provider()->GetDevice(); + auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); auto& regs = *register_file_; #if FINE_GRAINED_DRAW_SCOPES @@ -1685,7 +1709,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, readback_buffer_offset += memexport_range_size; } EndFrame(); - context->AwaitAllFramesCompletion(); + AwaitAllSubmissionsCompletion(); D3D12_RANGE readback_range; readback_range.Begin = 0; readback_range.End = memexport_total_size; @@ -1718,7 +1742,7 @@ void D3D12CommandProcessor::InitializeTrace() { anySubmitted |= shared_memory_->InitializeTraceSubmitDownloads(); if (anySubmitted) { EndFrame(); - GetD3D12Context()->AwaitAllFramesCompletion(); + AwaitAllSubmissionsCompletion(); shared_memory_->InitializeTraceCompleteDownloads(); } } @@ -1748,7 +1772,7 @@ bool D3D12CommandProcessor::IssueCopy() { readback_buffer, 0, shared_memory_buffer, written_address, written_length); EndFrame(); - GetD3D12Context()->AwaitAllFramesCompletion(); + AwaitAllSubmissionsCompletion(); D3D12_RANGE readback_range; readback_range.Begin = 0; readback_range.End = written_length; @@ -1766,7 +1790,7 @@ bool D3D12CommandProcessor::IssueCopy() { } bool D3D12CommandProcessor::BeginFrame() { - if (current_queue_frame_ != UINT32_MAX) { + if (submission_open_) { return false; } @@ -1774,17 +1798,23 @@ bool D3D12CommandProcessor::BeginFrame() { SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - auto context = GetD3D12Context(); - auto provider = context->GetD3D12Provider(); - context->BeginSwap(); - current_queue_frame_ = context->GetCurrentQueueFrame(); + submission_open_ = true; + + // Wait for a swap command list to become free. + // Command list 0 is used when fence_current_value_ is 1, 4, 7... + fence_completed_value_ = fence_->GetCompletedValue(); + if (fence_completed_value_ + kQueuedFrames < fence_current_value_) { + fence_->SetEventOnCompletion(fence_current_value_ - kQueuedFrames, + fence_completion_event_); + WaitForSingleObject(fence_completion_event_, INFINITE); + fence_completed_value_ = fence_->GetCompletedValue(); + } // Remove outdated temporary buffers. - uint64_t last_completed_frame = context->GetLastCompletedFrame(); auto erase_buffers_end = buffers_for_deletion_.begin(); while (erase_buffers_end != buffers_for_deletion_.end()) { - uint64_t upload_frame = erase_buffers_end->last_usage_frame; - if (upload_frame > last_completed_frame) { + uint64_t upload_fence_value = erase_buffers_end->last_usage_fence_value; + if (upload_fence_value > fence_completed_value_) { ++erase_buffers_end; break; } @@ -1830,16 +1860,17 @@ bool D3D12CommandProcessor::BeginFrame() { pix_capturing_ = pix_capture_requested_.exchange(false, std::memory_order_relaxed); if (pix_capturing_) { - IDXGraphicsAnalysis* graphics_analysis = provider->GetGraphicsAnalysis(); + IDXGraphicsAnalysis* graphics_analysis = + GetD3D12Context()->GetD3D12Provider()->GetGraphicsAnalysis(); if (graphics_analysis != nullptr) { graphics_analysis->BeginCapture(); } } deferred_command_list_->Reset(); - constant_buffer_pool_->Reclaim(last_completed_frame); - view_heap_pool_->Reclaim(last_completed_frame); - sampler_heap_pool_->Reclaim(last_completed_frame); + constant_buffer_pool_->Reclaim(fence_completed_value_); + view_heap_pool_->Reclaim(fence_completed_value_); + sampler_heap_pool_->Reclaim(fence_completed_value_); shared_memory_->BeginFrame(); @@ -1853,10 +1884,12 @@ bool D3D12CommandProcessor::BeginFrame() { } bool D3D12CommandProcessor::EndFrame() { - if (current_queue_frame_ == UINT32_MAX) { + if (!submission_open_) { return false; } + auto provider = GetD3D12Context()->GetD3D12Provider(); + assert_false(scratch_buffer_used_); pipeline_cache_->EndFrame(); @@ -1870,28 +1903,40 @@ bool D3D12CommandProcessor::EndFrame() { SubmitBarriers(); // Submit the command list. - auto current_command_list = command_lists_[current_queue_frame_].get(); + uint32_t command_list_index = + uint32_t((fence_current_value_ + (kQueuedFrames - 1)) % kQueuedFrames); + auto current_command_list = command_lists_[command_list_index].get(); current_command_list->BeginRecording(); deferred_command_list_->Execute(current_command_list->GetCommandList(), current_command_list->GetCommandList1()); current_command_list->Execute(); if (pix_capturing_) { - IDXGraphicsAnalysis* graphics_analysis = - GetD3D12Context()->GetD3D12Provider()->GetGraphicsAnalysis(); + IDXGraphicsAnalysis* graphics_analysis = provider->GetGraphicsAnalysis(); if (graphics_analysis != nullptr) { graphics_analysis->EndCapture(); } pix_capturing_ = false; } - auto context = GetD3D12Context(); - context->EndSwap(); - current_queue_frame_ = UINT32_MAX; + provider->GetDirectQueue()->Signal(fence_, fence_current_value_++); + submission_open_ = false; return true; } +void D3D12CommandProcessor::AwaitAllSubmissionsCompletion() { + // May be called if shutting down without everything set up. + if ((fence_completed_value_ + 1) >= fence_current_value_ || !fence_ || + GetD3D12Context()->WasLost()) { + return; + } + fence_->SetEventOnCompletion(fence_current_value_ - 1, + fence_completion_event_); + WaitForSingleObject(fence_completion_event_, INFINITE); + fence_completed_value_ = fence_current_value_ - 1; +} + void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { auto& regs = *register_file_; @@ -2626,10 +2671,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( bool D3D12CommandProcessor::UpdateBindings( const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, ID3D12RootSignature* root_signature) { - auto context = GetD3D12Context(); - auto provider = context->GetD3D12Provider(); + auto provider = GetD3D12Context()->GetD3D12Provider(); auto device = provider->GetDevice(); - auto current_frame = context->GetCurrentFrame(); auto& regs = *register_file_; #if FINE_GRAINED_DRAW_SCOPES @@ -2765,8 +2808,9 @@ bool D3D12CommandProcessor::UpdateBindings( // Update constant buffers. if (!cbuffer_bindings_system_.up_to_date) { uint8_t* system_constants = constant_buffer_pool_->Request( - current_frame, xe::align(uint32_t(sizeof(system_constants_)), 256u), - nullptr, nullptr, &cbuffer_bindings_system_.buffer_address); + fence_current_value_, + xe::align(uint32_t(sizeof(system_constants_)), 256u), nullptr, nullptr, + &cbuffer_bindings_system_.buffer_address); if (system_constants == nullptr) { return false; } @@ -2777,7 +2821,7 @@ bool D3D12CommandProcessor::UpdateBindings( } if (!cbuffer_bindings_float_vertex_.up_to_date) { uint8_t* float_constants = constant_buffer_pool_->Request( - current_frame, float_constant_size_vertex, nullptr, nullptr, + fence_current_value_, float_constant_size_vertex, nullptr, nullptr, &cbuffer_bindings_float_vertex_.buffer_address); if (float_constants == nullptr) { return false; @@ -2802,7 +2846,7 @@ bool D3D12CommandProcessor::UpdateBindings( } if (!cbuffer_bindings_float_pixel_.up_to_date) { uint8_t* float_constants = constant_buffer_pool_->Request( - current_frame, float_constant_size_pixel, nullptr, nullptr, + fence_current_value_, float_constant_size_pixel, nullptr, nullptr, &cbuffer_bindings_float_pixel_.buffer_address); if (float_constants == nullptr) { return false; @@ -2832,7 +2876,7 @@ bool D3D12CommandProcessor::UpdateBindings( if (!cbuffer_bindings_bool_loop_.up_to_date) { uint32_t* bool_loop_constants = reinterpret_cast(constant_buffer_pool_->Request( - current_frame, 768, nullptr, nullptr, + fence_current_value_, 768, nullptr, nullptr, &cbuffer_bindings_bool_loop_.buffer_address)); if (bool_loop_constants == nullptr) { return false; @@ -2851,9 +2895,9 @@ bool D3D12CommandProcessor::UpdateBindings( write_bool_loop_constant_view = true; } if (!cbuffer_bindings_fetch_.up_to_date) { - uint8_t* fetch_constants = - constant_buffer_pool_->Request(current_frame, 768, nullptr, nullptr, - &cbuffer_bindings_fetch_.buffer_address); + uint8_t* fetch_constants = constant_buffer_pool_->Request( + fence_current_value_, 768, nullptr, nullptr, + &cbuffer_bindings_fetch_.buffer_address); if (fetch_constants == nullptr) { return false; } @@ -3200,8 +3244,7 @@ ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) { } size = xe::align(size, kReadbackBufferSizeIncrement); if (size > readback_buffer_size_) { - auto context = GetD3D12Context(); - auto device = context->GetD3D12Provider()->GetDevice(); + auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc(buffer_desc, size, D3D12_RESOURCE_FLAG_NONE); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 070607d89..488d8a03f 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -63,6 +63,9 @@ class D3D12CommandProcessor : public CommandProcessor { // targets. bool IsROVUsedForEDRAM() const; + uint64_t GetCurrentFenceValue() const { return fence_current_value_; } + uint64_t GetCompletedFenceValue() const { return fence_completed_value_; } + // Gets the current color write mask, taking the pixel shader's write mask // into account. If a shader doesn't write to a render target, it shouldn't be // written to and it shouldn't be even bound - otherwise, in Halo 3, one @@ -167,6 +170,8 @@ class D3D12CommandProcessor : public CommandProcessor { void FinalizeTrace() override; private: + static constexpr uint32_t kQueuedFrames = 3; + enum RootParameter : UINT { // These are always present. @@ -215,6 +220,7 @@ class D3D12CommandProcessor : public CommandProcessor { bool BeginFrame(); // Returns true if an open frame was ended. bool EndFrame(); + void AwaitAllSubmissionsCompletion(); void UpdateFixedFunctionState(bool primitive_two_faced); void UpdateSystemConstantValues( @@ -239,8 +245,12 @@ class D3D12CommandProcessor : public CommandProcessor { bool cache_clear_requested_ = false; - std::unique_ptr - command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {}; + uint64_t fence_current_value_ = 1; + uint64_t fence_completed_value_ = 0; + HANDLE fence_completion_event_ = nullptr; + ID3D12Fence* fence_ = nullptr; + + std::unique_ptr command_lists_[kQueuedFrames] = {}; std::unique_ptr deferred_command_list_ = nullptr; std::unique_ptr shared_memory_ = nullptr; @@ -265,11 +275,10 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12Resource* gamma_ramp_texture_ = nullptr; D3D12_RESOURCE_STATES gamma_ramp_texture_state_; // Upload buffer for an image that is the same as gamma_ramp_, but with - // ui::d3d12::D3D12Context::kQueuedFrames array layers. + // kQueuedFrames array layers. ID3D12Resource* gamma_ramp_upload_ = nullptr; uint8_t* gamma_ramp_upload_mapping_ = nullptr; - D3D12_PLACED_SUBRESOURCE_FOOTPRINT - gamma_ramp_footprints_[ui::d3d12::D3D12Context::kQueuedFrames * 2]; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT gamma_ramp_footprints_[kQueuedFrames * 2]; static constexpr uint32_t kSwapTextureWidth = 1280; static constexpr uint32_t kSwapTextureHeight = 720; @@ -291,7 +300,7 @@ class D3D12CommandProcessor : public CommandProcessor { struct BufferForDeletion { ID3D12Resource* buffer; - uint64_t last_usage_frame; + uint64_t last_usage_fence_value; }; std::deque buffers_for_deletion_; @@ -305,7 +314,7 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12Resource* readback_buffer_ = nullptr; uint32_t readback_buffer_size_ = 0; - uint32_t current_queue_frame_ = UINT32_MAX; + bool submission_open_ = false; std::atomic pix_capture_requested_ = false; bool pix_capturing_; diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index 30afeac08..efba957b3 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -199,12 +199,6 @@ std::unique_ptr D3D12GraphicsSystem::Capture() { return d3d12_command_processor->Capture(); } -void D3D12GraphicsSystem::AwaitFrontBufferUnused() { - if (display_context_ != nullptr) { - display_context_->AwaitAllFramesCompletion(); - } -} - void D3D12GraphicsSystem::StretchTextureToFrontBuffer( D3D12_GPU_DESCRIPTOR_HANDLE handle, D3D12_GPU_DESCRIPTOR_HANDLE* gamma_ramp_handle, float gamma_ramp_inv_size, diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.h b/src/xenia/gpu/d3d12/d3d12_graphics_system.h index dbd6efa03..93c585406 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.h +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.h @@ -36,8 +36,6 @@ class D3D12GraphicsSystem : public GraphicsSystem { std::unique_ptr Capture() override; - void AwaitFrontBufferUnused(); - // Draws a texture covering the entire viewport to the render target currently // bound on the specified command list (in D3D12Context::kSwapChainFormat). // This changes the current pipeline, graphics root signature and primitive diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index 8842d7618..b714096d1 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -112,7 +112,7 @@ bool PrimitiveConverter::Initialize() { } static_ib_upload_->Unmap(0, nullptr); // Not uploaded yet. - static_ib_upload_frame_ = UINT64_MAX; + static_ib_upload_fence_value_ = UINT64_MAX; if (FAILED(device->CreateCommittedResource( &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, &static_ib_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, @@ -143,27 +143,27 @@ void PrimitiveConverter::Shutdown() { void PrimitiveConverter::ClearCache() { buffer_pool_->ClearCache(); } void PrimitiveConverter::BeginFrame() { + uint64_t completed_fence_value = command_processor_->GetCompletedFenceValue(); // Got a command list now - upload and transition the static index buffer if // needed. if (static_ib_upload_ != nullptr) { - auto context = command_processor_->GetD3D12Context(); - if (static_ib_upload_frame_ == UINT64_MAX) { + if (static_ib_upload_fence_value_ == UINT64_MAX) { // Not uploaded yet - upload. command_processor_->GetDeferredCommandList()->D3DCopyResource( static_ib_, static_ib_upload_); command_processor_->PushTransitionBarrier( static_ib_, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_INDEX_BUFFER); - static_ib_upload_frame_ = context->GetCurrentFrame(); - } else if (context->GetLastCompletedFrame() >= static_ib_upload_frame_) { + static_ib_upload_fence_value_ = + command_processor_->GetCurrentFenceValue(); + } else if (completed_fence_value >= static_ib_upload_fence_value_) { // Completely uploaded - release the upload buffer. static_ib_upload_->Release(); static_ib_upload_ = nullptr; } } - buffer_pool_->Reclaim( - command_processor_->GetD3D12Context()->GetLastCompletedFrame()); + buffer_pool_->Reclaim(completed_fence_value); converted_indices_cache_.clear(); memory_regions_used_ = 0; @@ -694,9 +694,9 @@ void* PrimitiveConverter::AllocateIndices( size += 16; } D3D12_GPU_VIRTUAL_ADDRESS gpu_address; - uint8_t* mapping = buffer_pool_->Request( - command_processor_->GetD3D12Context()->GetCurrentFrame(), size, nullptr, - nullptr, &gpu_address); + uint8_t* mapping = + buffer_pool_->Request(command_processor_->GetCurrentFenceValue(), size, + nullptr, nullptr, &gpu_address); if (mapping == nullptr) { XELOGE("Failed to allocate space for %u converted %u-bit vertex indices", count, format == IndexFormat::kInt32 ? 32 : 16); diff --git a/src/xenia/gpu/d3d12/primitive_converter.h b/src/xenia/gpu/d3d12/primitive_converter.h index eafc456bd..0da47ccc5 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.h +++ b/src/xenia/gpu/d3d12/primitive_converter.h @@ -111,7 +111,7 @@ class PrimitiveConverter { // CPU-side, used only for uploading - destroyed once the copy commands have // been completed. ID3D12Resource* static_ib_upload_ = nullptr; - uint64_t static_ib_upload_frame_; + uint64_t static_ib_upload_fence_value_; // GPU-side - used for drawing. ID3D12Resource* static_ib_ = nullptr; D3D12_GPU_VIRTUAL_ADDRESS static_ib_gpu_address_; diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index 84e0ce637..e2e52e1ba 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -167,8 +167,7 @@ void SharedMemory::Shutdown() { } void SharedMemory::BeginFrame() { - upload_buffer_pool_->Reclaim( - command_processor_->GetD3D12Context()->GetLastCompletedFrame()); + upload_buffer_pool_->Reclaim(command_processor_->GetCompletedFenceValue()); heap_creation_failed_ = false; } @@ -329,8 +328,8 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) { UINT range_tile_count = kHeapSize / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; // FIXME(Triang3l): This may cause issues if the emulator is shut down // mid-frame and the heaps are destroyed before tile mappings are updated - // (AwaitAllFramesCompletion won't catch this then). Defer this until the - // actual command list submission at the end of the frame. + // (awaiting the fence won't catch this then). Defer this until the actual + // command list submission at the end of the frame. direct_queue->UpdateTileMappings( buffer_, 1, ®ion_start_coordinates, ®ion_size, heaps_[i], 1, &range_flags, &heap_range_start_offset, &range_tile_count, @@ -376,7 +375,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { ID3D12Resource* upload_buffer; uint32_t upload_buffer_offset, upload_buffer_size; uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial( - command_processor_->GetD3D12Context()->GetCurrentFrame(), + command_processor_->GetCurrentFenceValue(), upload_range_length << page_size_log2_, &upload_buffer, &upload_buffer_offset, &upload_buffer_size, nullptr); if (upload_buffer_mapping == nullptr) { diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index f9ee6a705..74fe4211d 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -1169,8 +1169,7 @@ void TextureCache::BeginFrame() { texture_current_usage_time_ = xe::Clock::QueryHostUptimeMillis(); // If memory usage is too high, destroy unused textures. - uint64_t last_completed_frame = - command_processor_->GetD3D12Context()->GetLastCompletedFrame(); + uint64_t completed_fence_value = command_processor_->GetCompletedFenceValue(); uint32_t limit_soft_mb = cvars::d3d12_texture_cache_limit_soft; uint32_t limit_hard_mb = cvars::d3d12_texture_cache_limit_hard; if (IsResolutionScale2X()) { @@ -1187,7 +1186,7 @@ void TextureCache::BeginFrame() { break; } Texture* texture = texture_used_first_; - if (texture->last_usage_frame > last_completed_frame) { + if (texture->last_usage_fence_value > completed_fence_value) { break; } if (!limit_hard_exceeded && @@ -1956,8 +1955,8 @@ bool TextureCache::EnsureScaledResolveBufferResident(uint32_t start_unscaled, kScaledResolveHeapSize / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; // FIXME(Triang3l): This may cause issues if the emulator is shut down // mid-frame and the heaps are destroyed before tile mappings are updated - // (AwaitAllFramesCompletion won't catch this then). Defer this until the - // actual command list submission at the end of the frame. + // (awaiting the fence won't catch this then). Defer this until the actual + // command list submission. direct_queue->UpdateTileMappings( scaled_resolve_buffer_, 1, ®ion_start_coordinates, ®ion_size, scaled_resolve_heaps_[i], 1, &range_flags, &heap_range_start_offset, @@ -2293,8 +2292,8 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { // Untiling through a buffer instead of using unordered access because copying // is not done that often. desc.Flags = D3D12_RESOURCE_FLAG_NONE; - auto context = command_processor_->GetD3D12Context(); - auto device = context->GetD3D12Provider()->GetDevice(); + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); // Assuming untiling will be the next operation. D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST; ID3D12Resource* resource; @@ -2312,7 +2311,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { texture->resource_size = device->GetResourceAllocationInfo(0, 1, &desc).SizeInBytes; texture->state = state; - texture->last_usage_frame = context->GetCurrentFrame(); + texture->last_usage_fence_value = command_processor_->GetCurrentFenceValue(); texture->last_usage_time = texture_current_usage_time_; texture->used_previous = texture_used_last_; texture->used_next = nullptr; @@ -2406,8 +2405,7 @@ bool TextureCache::LoadTextureData(Texture* texture) { } auto command_list = command_processor_->GetDeferredCommandList(); - auto context = command_processor_->GetD3D12Context(); - auto provider = context->GetD3D12Provider(); + auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto device = provider->GetDevice(); // Get the pipeline. @@ -2608,7 +2606,7 @@ bool TextureCache::LoadTextureData(Texture* texture) { } D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address; uint8_t* cbuffer_mapping = cbuffer_pool->Request( - context->GetCurrentFrame(), + command_processor_->GetCurrentFenceValue(), xe::align(uint32_t(sizeof(load_constants)), 256u), nullptr, nullptr, &cbuffer_gpu_address); if (cbuffer_mapping == nullptr) { @@ -2686,11 +2684,10 @@ bool TextureCache::LoadTextureData(Texture* texture) { } void TextureCache::MarkTextureUsed(Texture* texture) { - uint64_t current_frame = - command_processor_->GetD3D12Context()->GetCurrentFrame(); + uint64_t current_fence_value = command_processor_->GetCurrentFenceValue(); // This is called very frequently, don't relink unless needed for caching. - if (texture->last_usage_frame != current_frame) { - texture->last_usage_frame = current_frame; + if (texture->last_usage_fence_value != current_fence_value) { + texture->last_usage_fence_value = current_fence_value; texture->last_usage_time = texture_current_usage_time_; if (texture->used_next == nullptr) { // Simplify the code a bit - already in the end of the list. diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index 9690a50fb..be6ccf4cf 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -343,7 +343,7 @@ class TextureCache { uint64_t resource_size; D3D12_RESOURCE_STATES state; - uint64_t last_usage_frame; + uint64_t last_usage_fence_value; uint64_t last_usage_time; Texture* used_previous; Texture* used_next; diff --git a/src/xenia/ui/d3d12/cpu_fence.cc b/src/xenia/ui/d3d12/cpu_fence.cc deleted file mode 100644 index a3095959d..000000000 --- a/src/xenia/ui/d3d12/cpu_fence.cc +++ /dev/null @@ -1,75 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2018 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/d3d12/cpu_fence.h" - -#include "xenia/base/logging.h" - -namespace xe { -namespace ui { -namespace d3d12 { - -std::unique_ptr CPUFence::Create(ID3D12Device* device, - ID3D12CommandQueue* queue) { - std::unique_ptr fence(new CPUFence(device, queue)); - if (!fence->Initialize()) { - return nullptr; - } - return fence; -} - -CPUFence::CPUFence(ID3D12Device* device, ID3D12CommandQueue* queue) - : device_(device), queue_(queue) {} - -CPUFence::~CPUFence() { - // First destroying the fence because it may reference the event. - if (fence_ != nullptr) { - fence_->Release(); - } - if (completion_event_ != nullptr) { - CloseHandle(completion_event_); - } -} - -bool CPUFence::Initialize() { - if (FAILED(device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(&fence_)))) { - XELOGE("Failed to create a fence"); - return false; - } - completion_event_ = CreateEvent(nullptr, false, false, nullptr); - if (completion_event_ == nullptr) { - XELOGE("Failed to create a fence completion event"); - fence_->Release(); - fence_ = nullptr; - return false; - } - queued_value_ = 0; - return true; -} - -void CPUFence::Enqueue() { - ++queued_value_; - queue_->Signal(fence_, queued_value_); -} - -bool CPUFence::IsCompleted() { - return fence_->GetCompletedValue() >= queued_value_; -} - -void CPUFence::Await() { - if (fence_->GetCompletedValue() < queued_value_) { - fence_->SetEventOnCompletion(queued_value_, completion_event_); - WaitForSingleObject(completion_event_, INFINITE); - } -} - -} // namespace d3d12 -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/d3d12/cpu_fence.h b/src/xenia/ui/d3d12/cpu_fence.h deleted file mode 100644 index 647eb5c47..000000000 --- a/src/xenia/ui/d3d12/cpu_fence.h +++ /dev/null @@ -1,52 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2018 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_D3D12_CPU_FENCE_H_ -#define XENIA_UI_D3D12_CPU_FENCE_H_ - -#include - -#include "xenia/ui/d3d12/d3d12_api.h" - -namespace xe { -namespace ui { -namespace d3d12 { - -class CPUFence { - public: - ~CPUFence(); - - static std::unique_ptr Create(ID3D12Device* device, - ID3D12CommandQueue* queue); - - // Submits the fence to the GPU command queue. - void Enqueue(); - - // Immediately returns whether the GPU has reached the fence. - bool IsCompleted(); - // Blocks until the fence has been reached. - void Await(); - - private: - CPUFence(ID3D12Device* device, ID3D12CommandQueue* queue); - bool Initialize(); - - ID3D12Device* device_; - ID3D12CommandQueue* queue_; - - ID3D12Fence* fence_ = nullptr; - HANDLE completion_event_ = nullptr; - uint64_t queued_value_ = 0; -}; - -} // namespace d3d12 -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_D3D12_CPU_FENCE_H_ diff --git a/src/xenia/ui/d3d12/d3d12_context.cc b/src/xenia/ui/d3d12/d3d12_context.cc index fa3664eb8..9117a5298 100644 --- a/src/xenia/ui/d3d12/d3d12_context.cc +++ b/src/xenia/ui/d3d12/d3d12_context.cc @@ -16,6 +16,7 @@ #include "xenia/base/math.h" #include "xenia/ui/d3d12/d3d12_immediate_drawer.h" #include "xenia/ui/d3d12/d3d12_provider.h" +#include "xenia/ui/d3d12/d3d12_util.h" #include "xenia/ui/window.h" DEFINE_bool(d3d12_random_clear_color, false, @@ -25,6 +26,9 @@ namespace xe { namespace ui { namespace d3d12 { +constexpr uint32_t D3D12Context::kSwapCommandListCount; +constexpr uint32_t D3D12Context::kSwapChainBufferCount; + D3D12Context::D3D12Context(D3D12Provider* provider, Window* target_window) : GraphicsContext(provider, target_window) {} @@ -38,23 +42,24 @@ bool D3D12Context::Initialize() { context_lost_ = false; - current_frame_ = 1; - // No frames have been completed yet. - last_completed_frame_ = 0; - // Keep in sync with the modulo because why not. - current_queue_frame_ = 1; - - // Create fences for synchronization of reuse and destruction of transient - // objects (like command lists) and for global shutdown. - for (uint32_t i = 0; i < kQueuedFrames; ++i) { - fences_[i] = CPUFence::Create(device, direct_queue); - if (fences_[i] == nullptr) { + if (target_window_) { + swap_fence_current_value_ = 1; + swap_fence_completed_value_ = 0; + swap_fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr); + if (swap_fence_completion_event_ == nullptr) { + XELOGE("Failed to create the composition fence completion event"); + Shutdown(); + return false; + } + // Create a fence for transient resources of compositing. + if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&swap_fence_)))) { + XELOGE("Failed to create the composition fence"); Shutdown(); return false; } - } - if (target_window_) { + // Create the swap chain. swap_chain_width_ = target_window_->scaled_width(); swap_chain_height_ = target_window_->scaled_height(); DXGI_SWAP_CHAIN_DESC1 swap_chain_desc; @@ -109,7 +114,7 @@ bool D3D12Context::Initialize() { } // Create command lists for compositing. - for (uint32_t i = 0; i < kQueuedFrames; ++i) { + for (uint32_t i = 0; i < kSwapCommandListCount; ++i) { swap_command_lists_[i] = CommandList::Create( device, direct_queue, D3D12_COMMAND_LIST_TYPE_DIRECT); if (swap_command_lists_[i] == nullptr) { @@ -126,7 +131,6 @@ bool D3D12Context::Initialize() { } } - initialized_fully_ = true; return true; } @@ -159,29 +163,30 @@ bool D3D12Context::InitializeSwapChainBuffers() { } void D3D12Context::Shutdown() { - if (initialized_fully_ && !context_lost_) { - AwaitAllFramesCompletion(); + if (!context_lost_ && swap_fence_ && + swap_fence_->GetCompletedValue() + 1 < swap_fence_current_value_) { + swap_fence_->SetEventOnCompletion(swap_fence_current_value_ - 1, + swap_fence_completion_event_); + WaitForSingleObject(swap_fence_completion_event_, INFINITE); } - initialized_fully_ = false; - immediate_drawer_.reset(); - if (swap_chain_ != nullptr) { - for (uint32_t i = 0; i < kQueuedFrames; ++i) { - swap_command_lists_[i].reset(); - } + for (uint32_t i = 0; i < kSwapCommandListCount; ++i) { + swap_command_lists_[i].reset(); + } + if (swap_chain_) { for (uint32_t i = 0; i < kSwapChainBufferCount; ++i) { - auto& buffer = swap_chain_buffers_[i]; - if (buffer == nullptr) { + auto& swap_chain_buffer = swap_chain_buffers_[i]; + if (!swap_chain_buffer) { break; } - buffer->Release(); - buffer = nullptr; + swap_chain_buffer->Release(); + swap_chain_buffer = nullptr; } - if (swap_chain_rtv_heap_ != nullptr) { + if (swap_chain_rtv_heap_) { swap_chain_rtv_heap_->Release(); swap_chain_rtv_heap_ = nullptr; } @@ -189,9 +194,14 @@ void D3D12Context::Shutdown() { swap_chain_->Release(); } - for (uint32_t i = 0; i < kQueuedFrames; ++i) { - fences_[i].reset(); + // First release the fence since it may reference the event. + util::ReleaseAndNull(swap_fence_); + if (swap_fence_completion_event_) { + CloseHandle(swap_fence_completion_event_); + swap_fence_completion_event_ = nullptr; } + swap_fence_current_value_ = 1; + swap_fence_completed_value_ = 0; } ImmediateDrawer* D3D12Context::immediate_drawer() { @@ -205,119 +215,125 @@ bool D3D12Context::MakeCurrent() { return true; } void D3D12Context::ClearCurrent() {} void D3D12Context::BeginSwap() { - if (context_lost_) { + if (!target_window_ || context_lost_) { return; } - // Await the availability of transient objects for the new frame. - // The frame number is incremented in EndSwap so it can be treated the same - // way both when inside a frame and when outside of it (it's tied to actual - // submissions). - fences_[current_queue_frame_]->Await(); - // Update the completed frame if didn't explicitly await all queued frames. - if (last_completed_frame_ + kQueuedFrames < current_frame_) { - last_completed_frame_ = current_frame_ - kQueuedFrames; - } - - if (target_window_ != nullptr) { - // Resize the swap chain if the window is resized. - uint32_t target_window_width = target_window_->scaled_width(); - uint32_t target_window_height = target_window_->scaled_height(); - if (swap_chain_width_ != target_window_width || - swap_chain_height_ != target_window_height) { - // Await the completion of swap chain use. - // Context loss is also faked if resizing fails. In this case, before the - // context is shut down to be recreated, frame completion must be awaited - // (this isn't done if the context is truly lost). - AwaitAllFramesCompletion(); - // All buffer references must be released before resizing. - for (uint32_t i = 0; i < kSwapChainBufferCount; ++i) { - swap_chain_buffers_[i]->Release(); - swap_chain_buffers_[i] = nullptr; - } - if (FAILED(swap_chain_->ResizeBuffers( - kSwapChainBufferCount, target_window_width, target_window_height, - kSwapChainFormat, 0))) { - context_lost_ = true; - return; - } - swap_chain_width_ = target_window_width; - swap_chain_height_ = target_window_height; - if (!InitializeSwapChainBuffers()) { - context_lost_ = true; - return; - } + // Resize the swap chain if the window is resized. + uint32_t target_window_width = target_window_->scaled_width(); + uint32_t target_window_height = target_window_->scaled_height(); + if (swap_chain_width_ != target_window_width || + swap_chain_height_ != target_window_height) { + // Await the completion of swap chain use. + // Context loss is also faked if resizing fails. In this case, before the + // context is shut down to be recreated, frame completion must be awaited + // (this isn't done if the context is truly lost). + if (swap_fence_completed_value_ + 1 < swap_fence_current_value_) { + swap_fence_->SetEventOnCompletion(swap_fence_current_value_ - 1, + swap_fence_completion_event_); + WaitForSingleObject(swap_fence_completion_event_, INFINITE); + swap_fence_completed_value_ = swap_fence_current_value_ - 1; } - - // Bind the back buffer as a render target and clear it. - auto command_list = swap_command_lists_[current_queue_frame_].get(); - auto graphics_command_list = command_list->BeginRecording(); - D3D12_RESOURCE_BARRIER barrier; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = - swap_chain_buffers_[swap_chain_back_buffer_index_]; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; - graphics_command_list->ResourceBarrier(1, &barrier); - D3D12_CPU_DESCRIPTOR_HANDLE back_buffer_rtv = GetSwapChainBackBufferRTV(); - graphics_command_list->OMSetRenderTargets(1, &back_buffer_rtv, TRUE, - nullptr); - float clear_color[4]; - if (cvars::d3d12_random_clear_color) { - clear_color[0] = - rand() / float(RAND_MAX); // NOLINT(runtime/threadsafe_fn) - clear_color[1] = 1.0f; - clear_color[2] = 0.0f; - } else { - clear_color[0] = 238.0f / 255.0f; - clear_color[1] = 238.0f / 255.0f; - clear_color[2] = 238.0f / 255.0f; + // All buffer references must be released before resizing. + for (uint32_t i = 0; i < kSwapChainBufferCount; ++i) { + swap_chain_buffers_[i]->Release(); + swap_chain_buffers_[i] = nullptr; } - clear_color[3] = 1.0f; - graphics_command_list->ClearRenderTargetView(back_buffer_rtv, clear_color, - 0, nullptr); - } -} - -void D3D12Context::EndSwap() { - if (context_lost_) { - return; - } - - if (target_window_ != nullptr) { - // Switch the back buffer to presentation state. - auto command_list = swap_command_lists_[current_queue_frame_].get(); - auto graphics_command_list = command_list->GetCommandList(); - D3D12_RESOURCE_BARRIER barrier; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = - swap_chain_buffers_[swap_chain_back_buffer_index_]; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; - graphics_command_list->ResourceBarrier(1, &barrier); - command_list->Execute(); - // Present and check if the context was lost. - HRESULT result = swap_chain_->Present(0, 0); - if (result == DXGI_ERROR_DEVICE_RESET || - result == DXGI_ERROR_DEVICE_REMOVED) { + if (FAILED(swap_chain_->ResizeBuffers( + kSwapChainBufferCount, target_window_width, target_window_height, + kSwapChainFormat, 0))) { + context_lost_ = true; + return; + } + swap_chain_width_ = target_window_width; + swap_chain_height_ = target_window_height; + if (!InitializeSwapChainBuffers()) { context_lost_ = true; return; } - // Get the back buffer index for the next frame. - swap_chain_back_buffer_index_ = swap_chain_->GetCurrentBackBufferIndex(); } - // Go to the next transient object frame. - fences_[current_queue_frame_]->Enqueue(); - ++current_queue_frame_; - if (current_queue_frame_ >= kQueuedFrames) { - current_queue_frame_ -= kQueuedFrames; + // Wait for a swap command list to become free. + // Command list 0 is used when swap_fence_current_value_ is 1, 4, 7... + swap_fence_completed_value_ = swap_fence_->GetCompletedValue(); + if (swap_fence_completed_value_ + kSwapCommandListCount < + swap_fence_current_value_) { + swap_fence_->SetEventOnCompletion( + swap_fence_current_value_ - kSwapCommandListCount, + swap_fence_completion_event_); + WaitForSingleObject(swap_fence_completion_event_, INFINITE); + swap_fence_completed_value_ = swap_fence_->GetCompletedValue(); } - ++current_frame_; + + // Bind the back buffer as a render target and clear it. + uint32_t command_list_index = + uint32_t((swap_fence_current_value_ + (kSwapCommandListCount - 1)) % + kSwapCommandListCount); + auto command_list = swap_command_lists_[command_list_index].get(); + auto graphics_command_list = command_list->BeginRecording(); + D3D12_RESOURCE_BARRIER barrier; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = + swap_chain_buffers_[swap_chain_back_buffer_index_]; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; + graphics_command_list->ResourceBarrier(1, &barrier); + D3D12_CPU_DESCRIPTOR_HANDLE back_buffer_rtv = GetSwapChainBackBufferRTV(); + graphics_command_list->OMSetRenderTargets(1, &back_buffer_rtv, TRUE, nullptr); + float clear_color[4]; + if (cvars::d3d12_random_clear_color) { + clear_color[0] = rand() / float(RAND_MAX); // NOLINT(runtime/threadsafe_fn) + clear_color[1] = 1.0f; + clear_color[2] = 0.0f; + } else { + clear_color[0] = 238.0f / 255.0f; + clear_color[1] = 238.0f / 255.0f; + clear_color[2] = 238.0f / 255.0f; + } + clear_color[3] = 1.0f; + graphics_command_list->ClearRenderTargetView(back_buffer_rtv, clear_color, 0, + nullptr); +} + +void D3D12Context::EndSwap() { + if (!target_window_ || context_lost_) { + return; + } + + // Switch the back buffer to presentation state. + uint32_t command_list_index = + uint32_t((swap_fence_current_value_ + (kSwapCommandListCount - 1)) % + kSwapCommandListCount); + auto command_list = swap_command_lists_[command_list_index].get(); + auto graphics_command_list = command_list->GetCommandList(); + D3D12_RESOURCE_BARRIER barrier; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = + swap_chain_buffers_[swap_chain_back_buffer_index_]; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; + graphics_command_list->ResourceBarrier(1, &barrier); + + command_list->Execute(); + + // Present and check if the context was lost. + HRESULT result = swap_chain_->Present(0, 0); + if (result == DXGI_ERROR_DEVICE_RESET || + result == DXGI_ERROR_DEVICE_REMOVED) { + context_lost_ = true; + return; + } + + // Signal the fence to wait for frame resources to become free again. + GetD3D12Provider()->GetDirectQueue()->Signal(swap_fence_, + swap_fence_current_value_++); + + // Get the back buffer index for the next frame. + swap_chain_back_buffer_index_ = swap_chain_->GetCurrentBackBufferIndex(); } std::unique_ptr D3D12Context::Capture() { @@ -325,19 +341,6 @@ std::unique_ptr D3D12Context::Capture() { return nullptr; } -void D3D12Context::AwaitAllFramesCompletion() { - // Await the last frame since previous frames must be completed before it. - if (context_lost_) { - return; - } - uint32_t await_frame = current_queue_frame_ + (kQueuedFrames - 1); - if (await_frame >= kQueuedFrames) { - await_frame -= kQueuedFrames; - } - fences_[await_frame]->Await(); - last_completed_frame_ = current_frame_ - 1; -} - D3D12_CPU_DESCRIPTOR_HANDLE D3D12Context::GetSwapChainBufferRTV( uint32_t buffer_index) const { return GetD3D12Provider()->OffsetRTVDescriptor(swap_chain_rtv_heap_start_, diff --git a/src/xenia/ui/d3d12/d3d12_context.h b/src/xenia/ui/d3d12/d3d12_context.h index a72ff35fd..bd26c3a40 100644 --- a/src/xenia/ui/d3d12/d3d12_context.h +++ b/src/xenia/ui/d3d12/d3d12_context.h @@ -13,7 +13,6 @@ #include #include "xenia/ui/d3d12/command_list.h" -#include "xenia/ui/d3d12/cpu_fence.h" #include "xenia/ui/d3d12/d3d12_immediate_drawer.h" #include "xenia/ui/d3d12/d3d12_provider.h" #include "xenia/ui/graphics_context.h" @@ -45,16 +44,6 @@ class D3D12Context : public GraphicsContext { return static_cast(provider_); } - // The count of copies of transient objects (like command lists, dynamic - // descriptor heaps) that must be kept when rendering with this context. - static constexpr uint32_t kQueuedFrames = 3; - // The current absolute frame number. - uint64_t GetCurrentFrame() { return current_frame_; } - // The last completed frame - it's fine to destroy objects used in it. - uint64_t GetLastCompletedFrame() { return last_completed_frame_; } - uint32_t GetCurrentQueueFrame() { return current_queue_frame_; } - void AwaitAllFramesCompletion(); - static constexpr DXGI_FORMAT kSwapChainFormat = DXGI_FORMAT_R8G8B8A8_UNORM; ID3D12Resource* GetSwapChainBuffer(uint32_t buffer_index) const { return swap_chain_buffers_[buffer_index]; @@ -71,8 +60,18 @@ class D3D12Context : public GraphicsContext { width = swap_chain_width_; height = swap_chain_height_; } + // Inside the current BeginSwap/EndSwap pair. + uint64_t GetSwapCurrentFenceValue() const { + return swap_fence_current_value_; + } + uint64_t GetSwapCompletedFenceValue() const { + return swap_fence_completed_value_; + } ID3D12GraphicsCommandList* GetSwapCommandList() const { - return swap_command_lists_[current_queue_frame_]->GetCommandList(); + uint32_t command_list_index = + uint32_t((swap_fence_current_value_ + (kSwapCommandListCount - 1)) % + kSwapCommandListCount); + return swap_command_lists_[command_list_index]->GetCommandList(); } private: @@ -85,15 +84,8 @@ class D3D12Context : public GraphicsContext { bool InitializeSwapChainBuffers(); void Shutdown(); - bool initialized_fully_ = false; - bool context_lost_ = false; - uint64_t current_frame_ = 1; - uint64_t last_completed_frame_ = 0; - uint32_t current_queue_frame_ = 1; - std::unique_ptr fences_[kQueuedFrames] = {}; - static constexpr uint32_t kSwapChainBufferCount = 3; IDXGISwapChain3* swap_chain_ = nullptr; uint32_t swap_chain_width_ = 0, swap_chain_height_ = 0; @@ -101,7 +93,17 @@ class D3D12Context : public GraphicsContext { uint32_t swap_chain_back_buffer_index_ = 0; ID3D12DescriptorHeap* swap_chain_rtv_heap_ = nullptr; D3D12_CPU_DESCRIPTOR_HANDLE swap_chain_rtv_heap_start_; - std::unique_ptr swap_command_lists_[kQueuedFrames] = {}; + + uint64_t swap_fence_current_value_ = 1; + uint64_t swap_fence_completed_value_ = 0; + HANDLE swap_fence_completion_event_ = nullptr; + ID3D12Fence* swap_fence_ = nullptr; + + static constexpr uint32_t kSwapCommandListCount = 3; + std::unique_ptr swap_command_lists_[kSwapCommandListCount] = {}; + // Current is + // ((swap_fence_current_value_ + (kSwapCommandListCount - 1))) % + // kSwapCommandListCount. std::unique_ptr immediate_drawer_ = nullptr; }; diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc index e89017157..8d321d3e4 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc @@ -399,7 +399,7 @@ void D3D12ImmediateDrawer::UpdateTexture(ImmediateTexture* texture, &location_source, nullptr); SubmittedTextureUpload submitted_upload; submitted_upload.buffer = buffer; - submitted_upload.frame = context_->GetCurrentFrame(); + submitted_upload.fence_value = context_->GetSwapCurrentFenceValue(); texture_uploads_submitted_.push_back(submitted_upload); } else { // Defer uploading to the next frame when there's a command list. @@ -417,14 +417,14 @@ void D3D12ImmediateDrawer::Begin(int render_target_width, // Use the compositing command list. current_command_list_ = context_->GetSwapCommandList(); - uint64_t current_frame = context_->GetCurrentFrame(); - uint64_t last_completed_frame = context_->GetLastCompletedFrame(); + uint64_t completed_fence_value = context_->GetSwapCompletedFenceValue(); + uint64_t current_fence_value = context_->GetSwapCurrentFenceValue(); // Remove temporary buffers for completed texture uploads. auto erase_uploads_end = texture_uploads_submitted_.begin(); while (erase_uploads_end != texture_uploads_submitted_.end()) { - uint64_t upload_frame = erase_uploads_end->frame; - if (upload_frame > last_completed_frame) { + uint64_t upload_fence_value = erase_uploads_end->fence_value; + if (upload_fence_value > completed_fence_value) { ++erase_uploads_end; break; } @@ -456,13 +456,13 @@ void D3D12ImmediateDrawer::Begin(int render_target_width, &location_source, nullptr); SubmittedTextureUpload submitted_upload; submitted_upload.buffer = pending_upload.buffer; - submitted_upload.frame = current_frame; + submitted_upload.fence_value = current_fence_value; texture_uploads_submitted_.push_back(submitted_upload); texture_uploads_pending_.pop_back(); } - vertex_buffer_pool_->Reclaim(last_completed_frame); - texture_descriptor_pool_->Reclaim(last_completed_frame); + vertex_buffer_pool_->Reclaim(completed_fence_value); + texture_descriptor_pool_->Reclaim(completed_fence_value); texture_descriptor_pool_heap_index_ = DescriptorHeapPool::kHeapIndexInvalid; current_render_target_width_ = render_target_width; @@ -493,6 +493,7 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { if (current_command_list_ == nullptr) { return; } + uint64_t current_fence_value = context_->GetSwapCurrentFenceValue(); batch_open_ = false; @@ -502,8 +503,8 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { vertex_buffer_view.SizeInBytes = batch.vertex_count * uint32_t(sizeof(ImmediateVertex)); void* vertex_buffer_mapping = vertex_buffer_pool_->Request( - context_->GetCurrentFrame(), vertex_buffer_view.SizeInBytes, nullptr, - nullptr, &vertex_buffer_view.BufferLocation); + current_fence_value, vertex_buffer_view.SizeInBytes, nullptr, nullptr, + &vertex_buffer_view.BufferLocation); if (vertex_buffer_mapping == nullptr) { XELOGE("Failed to get a buffer for %u vertices in the immediate drawer", batch.vertex_count); @@ -520,7 +521,7 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { index_buffer_view.SizeInBytes = batch.index_count * sizeof(uint16_t); index_buffer_view.Format = DXGI_FORMAT_R16_UINT; void* index_buffer_mapping = vertex_buffer_pool_->Request( - context_->GetCurrentFrame(), + current_fence_value, xe::align(index_buffer_view.SizeInBytes, UINT(sizeof(uint32_t))), nullptr, nullptr, &index_buffer_view.BufferLocation); if (index_buffer_mapping == nullptr) { @@ -563,7 +564,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { bool bind_texture = current_texture_ != texture; uint32_t texture_descriptor_index; uint64_t texture_heap_index = texture_descriptor_pool_->Request( - context_->GetCurrentFrame(), texture_descriptor_pool_heap_index_, + context_->GetSwapCurrentFenceValue(), texture_descriptor_pool_heap_index_, bind_texture ? 1 : 0, 1, texture_descriptor_index); if (texture_heap_index == DescriptorHeapPool::kHeapIndexInvalid) { return; @@ -674,9 +675,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { void D3D12ImmediateDrawer::EndDrawBatch() { batch_open_ = false; } -void D3D12ImmediateDrawer::End() { - current_command_list_ = nullptr; -} +void D3D12ImmediateDrawer::End() { current_command_list_ = nullptr; } } // namespace d3d12 } // namespace ui diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.h b/src/xenia/ui/d3d12/d3d12_immediate_drawer.h index 6003f611e..1958768e3 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.h +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.h @@ -87,7 +87,7 @@ class D3D12ImmediateDrawer : public ImmediateDrawer { struct SubmittedTextureUpload { ID3D12Resource* buffer; - uint64_t frame; + uint64_t fence_value; }; std::deque texture_uploads_submitted_; diff --git a/src/xenia/ui/d3d12/pools.cc b/src/xenia/ui/d3d12/pools.cc index 2b2a608dd..2b5275fbd 100644 --- a/src/xenia/ui/d3d12/pools.cc +++ b/src/xenia/ui/d3d12/pools.cc @@ -22,9 +22,7 @@ namespace d3d12 { UploadBufferPool::UploadBufferPool(ID3D12Device* device, uint32_t page_size) : device_(device), page_size_(page_size) {} -UploadBufferPool::~UploadBufferPool() { - ClearCache(); -} +UploadBufferPool::~UploadBufferPool() { ClearCache(); } void UploadBufferPool::Reclaim(uint64_t completed_fence_value) { while (submitted_first_) { @@ -172,9 +170,7 @@ DescriptorHeapPool::DescriptorHeapPool(ID3D12Device* device, uint32_t page_size) : device_(device), type_(type), page_size_(page_size) {} -DescriptorHeapPool::~DescriptorHeapPool() { - ClearCache(); -} +DescriptorHeapPool::~DescriptorHeapPool() { ClearCache(); } void DescriptorHeapPool::Reclaim(uint64_t completed_fence_value) { while (submitted_first_) {