diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 9e70e7652..223f1ef64 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -42,6 +42,48 @@ ID3D12GraphicsCommandList* D3D12CommandProcessor::GetCurrentCommandList() return command_lists_[current_queue_frame_]->GetCommandList(); } +void D3D12CommandProcessor::PushTransitionBarrier( + ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state, + D3D12_RESOURCE_STATES new_state, UINT subresource) { + if (old_state == new_state) { + return; + } + D3D12_RESOURCE_BARRIER barrier; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = resource; + barrier.Transition.Subresource = subresource; + barrier.Transition.StateBefore = old_state; + barrier.Transition.StateAfter = new_state; + barriers_.push_back(barrier); +} + +void D3D12CommandProcessor::PushAliasingBarrier(ID3D12Resource* old_resource, + ID3D12Resource* new_resource) { + D3D12_RESOURCE_BARRIER barrier; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Aliasing.pResourceBefore = old_resource; + barrier.Aliasing.pResourceAfter = new_resource; + barriers_.push_back(barrier); +} + +void D3D12CommandProcessor::PushUAVBarrier(ID3D12Resource* resource) { + D3D12_RESOURCE_BARRIER barrier; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.UAV.pResource = resource; + barriers_.push_back(barrier); +} + +void D3D12CommandProcessor::SubmitBarriers() { + UINT barrier_count = UINT(barriers_.size()); + if (barrier_count != 0) { + GetCurrentCommandList()->ResourceBarrier(barrier_count, barriers_.data()); + barriers_.clear(); + } +} + ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) { assert_true(vertex_shader->is_translated()); @@ -372,17 +414,8 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( } if (size <= scratch_buffer_size_) { - if (scratch_buffer_state_ != state) { - D3D12_RESOURCE_BARRIER barrier; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = scratch_buffer_; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = scratch_buffer_state_; - barrier.Transition.StateAfter = state; - GetCurrentCommandList()->ResourceBarrier(1, &barrier); - scratch_buffer_state_ = state; - } + PushTransitionBarrier(scratch_buffer_, scratch_buffer_state_, state); + scratch_buffer_state_ = state; scratch_buffer_used_ = true; return scratch_buffer_; } @@ -470,7 +503,7 @@ bool D3D12CommandProcessor::SetupContext() { sampler_heap_pool_ = std::make_unique( context, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2048); - shared_memory_ = std::make_unique(memory_, context); + shared_memory_ = std::make_unique(this, memory_); if (!shared_memory_->Initialize()) { XELOGE("Failed to initialize shared memory"); return false; @@ -764,7 +797,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, } shared_memory_->RequestRange( regs[vfetch_constant_index].u32 & 0x1FFFFFFC, - regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC, command_list); + regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC); vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); } if (indexed) { @@ -774,9 +807,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, : sizeof(uint16_t); index_base &= ~(index_size - 1); uint32_t index_buffer_size = index_buffer_info->count * index_size; - shared_memory_->RequestRange(index_base, index_buffer_size, command_list); + shared_memory_->RequestRange(index_base, index_buffer_size); - shared_memory_->UseForReading(command_list); + shared_memory_->UseForReading(); D3D12_INDEX_BUFFER_VIEW index_buffer_view; index_buffer_view.BufferLocation = shared_memory_->GetGPUAddress() + index_base; @@ -785,9 +818,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, ? DXGI_FORMAT_R32_UINT : DXGI_FORMAT_R16_UINT; command_list->IASetIndexBuffer(&index_buffer_view); + SubmitBarriers(); command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0); } else { - shared_memory_->UseForReading(command_list); + shared_memory_->UseForReading(); + SubmitBarriers(); command_list->DrawInstanced(index_count, 1, 0, 0); } @@ -873,6 +908,9 @@ bool D3D12CommandProcessor::EndFrame() { shared_memory_->EndFrame(); + // Submit barriers now because resources the queued barriers are for may be + // destroyed between frames. + SubmitBarriers(); command_lists_[current_queue_frame_]->Execute(); sampler_heap_pool_->EndFrame(); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index cbcfe3b27..038ef1c3b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -47,6 +47,15 @@ class D3D12CommandProcessor : public CommandProcessor { // Returns the drawing command list for the currently open frame. ID3D12GraphicsCommandList* GetCurrentCommandList() const; + void PushTransitionBarrier( + ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state, + D3D12_RESOURCE_STATES new_state, + UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); + void PushAliasingBarrier(ID3D12Resource* old_resource, + ID3D12Resource* new_resource); + void PushUAVBarrier(ID3D12Resource* resource); + void SubmitBarriers(); + // Finds or creates root signature for a pipeline. ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader); @@ -176,6 +185,9 @@ class D3D12CommandProcessor : public CommandProcessor { std::unique_ptr view_heap_pool_ = nullptr; std::unique_ptr sampler_heap_pool_ = nullptr; + // Unsubmitted barrier batch. + std::vector barriers_; + struct BufferForDeletion { ID3D12Resource* buffer; uint64_t last_usage_frame; diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index ad1f0010d..4e7add734 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -578,9 +578,6 @@ bool RenderTargetCache::UpdateRenderTargets() { auto device = command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); - D3D12_RESOURCE_BARRIER barriers[5]; - uint32_t barrier_count = 0; - // Allocate new render targets and add them to the bindings list. for (uint32_t i = 0; i < 5; ++i) { if (!(render_targets_to_attach & (1 << i))) { @@ -636,14 +633,8 @@ bool RenderTargetCache::UpdateRenderTargets() { heap_usage[heap_page_first >> 3] += heap_page_count; // Inform Direct3D that we're reusing the heap for this render target. - D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++]; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Aliasing.pResourceBefore = nullptr; - barrier.Aliasing.pResourceAfter = binding.render_target->resource; - } - if (barrier_count != 0) { - command_list->ResourceBarrier(barrier_count, barriers); + command_processor_->PushAliasingBarrier(nullptr, + binding.render_target->resource); } // Load the contents of the new render targets from the EDRAM buffer (will @@ -671,7 +662,6 @@ bool RenderTargetCache::UpdateRenderTargets() { // Transition the render targets to the appropriate state if needed, // compress the list of the render target because null RTV descriptors are // broken in Direct3D 12 and bind the render targets to the command list. - barrier_count = 0; D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[4]; uint32_t rtv_count = 0; for (uint32_t i = 0; i < 4; ++i) { @@ -680,17 +670,10 @@ bool RenderTargetCache::UpdateRenderTargets() { if (!binding.is_bound || render_target == nullptr) { continue; } - if (render_target->state != D3D12_RESOURCE_STATE_RENDER_TARGET) { - D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++]; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = render_target->resource; - barrier.Transition.Subresource = - D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = render_target->state; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; - render_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET; - } + command_processor_->PushTransitionBarrier( + render_target->resource, render_target->state, + D3D12_RESOURCE_STATE_RENDER_TARGET); + render_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET; rtv_handles[rtv_count] = render_target->handle; current_pipeline_render_targets_[rtv_count].guest_render_target = i; current_pipeline_render_targets_[rtv_count].format = @@ -706,17 +689,10 @@ bool RenderTargetCache::UpdateRenderTargets() { RenderTarget* depth_render_target = depth_binding.render_target; current_pipeline_render_targets_[4].guest_render_target = 4; if (depth_binding.is_bound && depth_render_target != nullptr) { - if (depth_render_target->state != D3D12_RESOURCE_STATE_DEPTH_WRITE) { - D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++]; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = depth_render_target->resource; - barrier.Transition.Subresource = - D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = depth_render_target->state; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; - depth_render_target->state = D3D12_RESOURCE_STATE_DEPTH_WRITE; - } + command_processor_->PushTransitionBarrier( + depth_render_target->resource, depth_render_target->state, + D3D12_RESOURCE_STATE_DEPTH_WRITE); + depth_render_target->state = D3D12_RESOURCE_STATE_DEPTH_WRITE; dsv_handle = &depth_binding.render_target->handle; current_pipeline_render_targets_[4].format = GetDepthDXGIFormat(DepthRenderTargetFormat(formats[4])); @@ -724,9 +700,7 @@ bool RenderTargetCache::UpdateRenderTargets() { dsv_handle = nullptr; current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN; } - if (barrier_count != 0) { - command_list->ResourceBarrier(barrier_count, barriers); - } + command_processor_->SubmitBarriers(); command_list->OMSetRenderTargets(rtv_count, rtv_handles, FALSE, dsv_handle); } @@ -1282,17 +1256,11 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() { return; } - uint32_t store_bindings[5]; - uint32_t store_binding_count = 0; - - // 6 for 5 render targets + the EDRAM buffer. - D3D12_RESOURCE_BARRIER barriers[6]; - uint32_t barrier_count; - // Extract only the render targets that need to be stored, transition them to // copy sources and calculate copy buffer size. + uint32_t store_bindings[5]; + uint32_t store_binding_count = 0; uint32_t copy_buffer_size = 0; - barrier_count = 0; for (uint32_t i = 0; i < 5; ++i) { const RenderTargetBinding& binding = current_bindings_[i]; RenderTarget* render_target = binding.render_target; @@ -1300,38 +1268,13 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() { binding.edram_dirty_rows < 0) { continue; } - store_bindings[store_binding_count] = i; + store_bindings[store_binding_count++] = i; copy_buffer_size = std::max(copy_buffer_size, render_target->copy_buffer_size); - ++store_binding_count; - if (render_target->state != D3D12_RESOURCE_STATE_COPY_SOURCE) { - D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++]; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = render_target->resource; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = render_target->state; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - render_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE; - } } if (store_binding_count == 0) { return; } - if (edram_buffer_state_ != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { - // Also transition the EDRAM buffer to UAV. - D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++]; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = edram_buffer_; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = edram_buffer_state_; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - } - if (barrier_count != 0) { - command_list->ResourceBarrier(barrier_count, barriers); - } // Allocate descriptors for the buffers. D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; @@ -1349,6 +1292,21 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() { return; } + // Transition the render targets that need to be stored to copy sources and + // the EDRAM buffer to a UAV. + for (uint32_t i = 0; i < store_binding_count; ++i) { + RenderTarget* render_target = + current_bindings_[store_bindings[i]].render_target; + command_processor_->PushTransitionBarrier(render_target->resource, + render_target->state, + D3D12_RESOURCE_STATE_COPY_SOURCE); + render_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE; + } + command_processor_->PushTransitionBarrier( + edram_buffer_, edram_buffer_state_, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + // Prepare for storing. auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto device = provider->GetDevice(); @@ -1410,6 +1368,12 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() { rt_pitch_tiles *= 2; } + // Transition the copy buffer to copy destination. + command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state, + D3D12_RESOURCE_STATE_COPY_DEST); + copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST; + command_processor_->SubmitBarriers(); + // Copy from the render target planes and set up the layout. D3D12_TEXTURE_COPY_LOCATION location_source, location_dest; location_source.pResource = render_target->resource; @@ -1440,16 +1404,11 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() { } // Transition the copy buffer to SRV. - barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barriers[0].Transition.pResource = copy_buffer; - barriers[0].Transition.Subresource = - D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - barriers[0].Transition.StateAfter = - D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + command_processor_->PushTransitionBarrier( + copy_buffer, copy_buffer_state, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; - command_list->ResourceBarrier(1, barriers); + command_processor_->SubmitBarriers(); // Store the data. command_list->SetComputeRoot32BitConstants( @@ -1459,24 +1418,8 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() { command_processor_->SetPipeline(edram_store_pipelines_[size_t(mode)]); command_list->Dispatch(rt_pitch_tiles, binding.edram_dirty_rows, 1); - // Commit the UAV write and prepare for copying again. - barrier_count = 1; - barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barriers[0].UAV.pResource = edram_buffer_; - if (i + 1 < store_binding_count) { - barrier_count = 2; - barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barriers[1].Transition.pResource = copy_buffer; - barriers[1].Transition.Subresource = - D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barriers[1].Transition.StateBefore = - D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; - barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; - copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST; - } - command_list->ResourceBarrier(barrier_count, barriers); + // Commit the UAV write. + command_processor_->PushUAVBarrier(edram_buffer_); } command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); @@ -1495,45 +1438,6 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM( return; } - // 6 for 5 render targets + the EDRAM buffer. - D3D12_RESOURCE_BARRIER barriers[6]; - uint32_t barrier_count; - - // Transition the render targets to copy destinations and calculate copy - // buffer size. - uint32_t copy_buffer_size = 0; - barrier_count = 0; - for (uint32_t i = 0; i < render_target_count; ++i) { - RenderTarget* render_target = render_targets[i]; - copy_buffer_size = - std::max(copy_buffer_size, render_target->copy_buffer_size); - if (render_target->state != D3D12_RESOURCE_STATE_COPY_DEST) { - D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++]; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = render_target->resource; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = render_target->state; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; - render_target->state = D3D12_RESOURCE_STATE_COPY_DEST; - } - } - if (edram_buffer_state_ != D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) { - // Also transition the EDRAM buffer to SRV. - D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++]; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = edram_buffer_; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = edram_buffer_state_; - barrier.Transition.StateAfter = - D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; - edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; - } - if (barrier_count != 0) { - command_list->ResourceBarrier(barrier_count, barriers); - } - // Allocate descriptors for the buffers. D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; @@ -1543,6 +1447,11 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM( } // Get the buffer for copying. + uint32_t copy_buffer_size = 0; + for (uint32_t i = 0; i < render_target_count; ++i) { + copy_buffer_size = + std::max(copy_buffer_size, render_targets[i]->copy_buffer_size); + } D3D12_RESOURCE_STATES copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer( @@ -1551,7 +1460,21 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM( return; } - // Prepare for loading. + // Transition the render targets to copy destinations and the EDRAM buffer to + // a SRV. + for (uint32_t i = 0; i < render_target_count; ++i) { + RenderTarget* render_target = render_targets[i]; + command_processor_->PushTransitionBarrier(render_target->resource, + render_target->state, + D3D12_RESOURCE_STATE_COPY_DEST); + render_target->state = D3D12_RESOURCE_STATE_COPY_DEST; + } + command_processor_->PushTransitionBarrier( + edram_buffer_, edram_buffer_state_, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + + // Set up the bindings. auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto device = provider->GetDevice(); auto descriptor_size_view = provider->GetDescriptorSizeView(); @@ -1583,8 +1506,8 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM( // Load each render target. for (uint32_t i = 0; i < render_target_count; ++i) { if (edram_bases[i] >= 2048) { - // Something is wrong with the resolve. - return; + // Something is wrong with the load. + continue; } const RenderTarget* render_target = render_targets[i]; @@ -1595,8 +1518,7 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM( ColorRenderTargetFormat(render_target->key.format))) { edram_pitch_tiles *= 2; } - // Validate the height in case the resolve is somehow too large (shouldn't - // happen though, but who knows what games do). + // Clamp the height if somehow requested a render target that is too large. uint32_t edram_rows = std::min(render_target->key.height_ss_div_16, (2048u - edram_bases[i]) / edram_pitch_tiles); @@ -1605,19 +1527,12 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM( } // Transition the copy buffer back to UAV if it's not the first load. - if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { - barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barriers[0].Transition.pResource = copy_buffer; - barriers[0].Transition.Subresource = - D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barriers[0].Transition.StateBefore = copy_buffer_state; - barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - command_list->ResourceBarrier(1, barriers); - } + command_processor_->PushTransitionBarrier( + copy_buffer, copy_buffer_state, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; // Load the data. + command_processor_->SubmitBarriers(); EDRAMLoadStoreRootConstants root_constants; root_constants.base_pitch_tiles = edram_bases[i] | (edram_pitch_tiles << 11); @@ -1638,21 +1553,14 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM( command_processor_->SetPipeline(edram_load_pipelines_[size_t(mode)]); command_list->Dispatch(edram_pitch_tiles, edram_rows, 1); - // Commit the UAV write and transition the copy buffer to copy source. - barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barriers[0].UAV.pResource = copy_buffer; - barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barriers[1].Transition.pResource = copy_buffer; - barriers[1].Transition.Subresource = - D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + // Commit the UAV write and transition the copy buffer to copy source now. + command_processor_->PushUAVBarrier(copy_buffer); + command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state, + D3D12_RESOURCE_STATE_COPY_SOURCE); copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE; - command_list->ResourceBarrier(2, barriers); // Copy to the render target planes. + command_processor_->SubmitBarriers(); D3D12_TEXTURE_COPY_LOCATION location_source, location_dest; location_source.pResource = copy_buffer; location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index f78943466..118ac9e62 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -17,13 +17,15 @@ #include "xenia/base/math.h" #include "xenia/base/memory.h" #include "xenia/base/profiling.h" +#include "xenia/gpu/d3d12/d3d12_command_processor.h" namespace xe { namespace gpu { namespace d3d12 { -SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context) - : memory_(memory), context_(context) { +SharedMemory::SharedMemory(D3D12CommandProcessor* command_processor, + Memory* memory) + : command_processor_(command_processor), memory_(memory) { page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size())); page_count_ = kBufferSize >> page_size_log2_; uint32_t page_bitmap_length = page_count_ >> 6; @@ -36,7 +38,8 @@ SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context) SharedMemory::~SharedMemory() { Shutdown(); } bool SharedMemory::Initialize() { - auto device = context_->GetD3D12Provider()->GetDevice(); + auto context = command_processor_->GetD3D12Context(); + auto device = context->GetD3D12Provider()->GetDevice(); buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST; D3D12_RESOURCE_DESC buffer_desc; @@ -80,7 +83,7 @@ bool SharedMemory::Initialize() { protected_pages_.size() * sizeof(uint64_t)); upload_buffer_pool_ = - std::make_unique(context_, 4 * 1024 * 1024); + std::make_unique(context, 4 * 1024 * 1024); memory_->SetGlobalPhysicalAccessWatch(MemoryWriteCallbackThunk, this); @@ -219,7 +222,7 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) { // current frame anymore if have failed at least once. return false; } - auto provider = context_->GetD3D12Provider(); + auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto device = provider->GetDevice(); auto direct_queue = provider->GetDirectQueue(); D3D12_HEAP_DESC heap_desc = {}; @@ -254,8 +257,7 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) { return true; } -bool SharedMemory::RequestRange(uint32_t start, uint32_t length, - ID3D12GraphicsCommandList* command_list) { +bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { if (length == 0) { // Some texture is empty, for example - safe to draw in this case. return true; @@ -267,6 +269,11 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length, } uint32_t last = start + length - 1; + auto command_list = command_processor_->GetCurrentCommandList(); + if (command_list == nullptr) { + return false; + } + #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES @@ -284,7 +291,8 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length, if (upload_ranges_.size() == 0) { return true; } - TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list); + TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST); + command_processor_->SubmitBarriers(); for (auto upload_range : upload_ranges_) { uint32_t upload_range_start = upload_range.first; uint32_t upload_range_length = upload_range.second; @@ -505,33 +513,23 @@ bool SharedMemory::MemoryWriteCallback(uint32_t address) { return true; } -void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state, - ID3D12GraphicsCommandList* command_list) { - if (buffer_state_ == new_state) { - return; - } - D3D12_RESOURCE_BARRIER barrier; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = buffer_; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = buffer_state_; - barrier.Transition.StateAfter = new_state; - command_list->ResourceBarrier(1, &barrier); +void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state) { + command_processor_->PushTransitionBarrier(buffer_, buffer_state_, new_state); buffer_state_ = new_state; } -void SharedMemory::UseForReading(ID3D12GraphicsCommandList* command_list) { +void SharedMemory::UseForReading() { TransitionBuffer(D3D12_RESOURCE_STATE_INDEX_BUFFER | - D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, - command_list); + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); } -void SharedMemory::UseForWriting(ID3D12GraphicsCommandList* command_list) { - TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, command_list); +void SharedMemory::UseForWriting() { + TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); } void SharedMemory::CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle) { + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); D3D12_SHADER_RESOURCE_VIEW_DESC desc; desc.Format = DXGI_FORMAT_R32_TYPELESS; desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; @@ -540,11 +538,12 @@ void SharedMemory::CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle) { desc.Buffer.NumElements = kBufferSize >> 2; desc.Buffer.StructureByteStride = 0; desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; - context_->GetD3D12Provider()->GetDevice()->CreateShaderResourceView( - buffer_, &desc, handle); + device->CreateShaderResourceView(buffer_, &desc, handle); } void SharedMemory::CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) { + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); D3D12_UNORDERED_ACCESS_VIEW_DESC desc; desc.Format = DXGI_FORMAT_R32_TYPELESS; desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; @@ -553,8 +552,7 @@ void SharedMemory::CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) { desc.Buffer.StructureByteStride = 0; desc.Buffer.CounterOffsetInBytes = 0; desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; - context_->GetD3D12Provider()->GetDevice()->CreateUnorderedAccessView( - buffer_, nullptr, &desc, handle); + device->CreateUnorderedAccessView(buffer_, nullptr, &desc, handle); } } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index c119bcd50..23f85dfa9 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -23,12 +23,14 @@ namespace xe { namespace gpu { namespace d3d12 { +class D3D12CommandProcessor; + // Manages memory for unconverted textures, resolve targets, vertex and index // buffers that can be accessed from shaders with Xenon physical addresses, with // system page size granularity. class SharedMemory { public: - SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context); + SharedMemory(D3D12CommandProcessor* command_processor, Memory* memory); ~SharedMemory(); bool Initialize(); @@ -39,8 +41,6 @@ class SharedMemory { } void BeginFrame(); - // Returns true if anything has been written to command_list been done. - // The draw command list is needed for the transition. void EndFrame(); typedef void (*WatchCallback)(void* context, void* data, uint64_t argument); @@ -76,8 +76,7 @@ class SharedMemory { // tiled buffer to copy destination - call this before UseForReading or // UseForWriting. Returns true if the range has been fully updated and is // usable. - bool RequestRange(uint32_t start, uint32_t length, - ID3D12GraphicsCommandList* command_list); + bool RequestRange(uint32_t start, uint32_t length); // Marks the range as containing GPU-generated data (such as resolves), // triggering modification callbacks, making it valid (so pages are not @@ -86,17 +85,17 @@ class SharedMemory { void RangeWrittenByGPU(uint32_t start, uint32_t length); // Makes the buffer usable for vertices, indices and texture untiling. - void UseForReading(ID3D12GraphicsCommandList* command_list); + void UseForReading(); // Makes the buffer usable for texture tiling after a resolve. - void UseForWriting(ID3D12GraphicsCommandList* command_list); + void UseForWriting(); void CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle); void CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle); private: - Memory* memory_; + D3D12CommandProcessor* command_processor_; - ui::d3d12::D3D12Context* context_; + Memory* memory_; // The 512 MB tiled buffer. static constexpr uint32_t kBufferSizeLog2 = 29; @@ -204,8 +203,7 @@ class SharedMemory { uint32_t request_page_count); std::unique_ptr upload_buffer_pool_ = nullptr; - void TransitionBuffer(D3D12_RESOURCE_STATES new_state, - ID3D12GraphicsCommandList* command_list); + void TransitionBuffer(D3D12_RESOURCE_STATES new_state); }; } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 0099d1c95..55ffa672a 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -296,8 +296,6 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask, } // Transition the textures to the needed usage. - D3D12_RESOURCE_BARRIER barriers[32]; - uint32_t barrier_count = 0; used_texture_mask = used_vertex_texture_mask | used_pixel_texture_mask; while (xe::bit_scan_forward(used_texture_mask, &index)) { uint32_t index_bit = 1u << index; @@ -313,20 +311,9 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask, if (used_pixel_texture_mask & index_bit) { state |= D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; } - if (texture->state != state) { - D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count]; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = texture->resource; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = texture->state; - barrier.Transition.StateAfter = state; - ++barrier_count; - texture->state = state; - } - } - if (barrier_count != 0) { - command_list->ResourceBarrier(barrier_count, barriers); + command_processor_->PushTransitionBarrier(texture->resource, texture->state, + state); + texture->state = state; } } @@ -746,13 +733,13 @@ bool TextureCache::LoadTextureData(Texture* texture) { // Request uploading of the texture data to the shared memory. if (!base_in_sync) { if (!shared_memory_->RequestRange(texture->key.base_page << 12, - texture->base_size, command_list)) { + texture->base_size)) { return false; } } if (!mips_in_sync) { if (!shared_memory_->RequestRange(texture->key.mip_page << 12, - texture->mip_size, command_list)) { + texture->mip_size)) { return false; } } @@ -791,7 +778,7 @@ bool TextureCache::LoadTextureData(Texture* texture) { command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); return false; } - shared_memory_->UseForReading(command_list); + shared_memory_->UseForReading(); shared_memory_->CreateSRV(descriptor_cpu_start); D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc; uav_desc.Format = DXGI_FORMAT_R32_TYPELESS; @@ -811,19 +798,9 @@ bool TextureCache::LoadTextureData(Texture* texture) { command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); // Submit commands. - D3D12_RESOURCE_BARRIER barriers[2]; - barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - if (texture->state != D3D12_RESOURCE_STATE_COPY_DEST) { - barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[0].Transition.pResource = texture->resource; - barriers[0].Transition.Subresource = - D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barriers[0].Transition.StateBefore = texture->state; - barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; - command_list->ResourceBarrier(1, barriers); - texture->state = D3D12_RESOURCE_STATE_COPY_DEST; - } + command_processor_->PushTransitionBarrier(texture->resource, texture->state, + D3D12_RESOURCE_STATE_COPY_DEST); + texture->state = D3D12_RESOURCE_STATE_COPY_DEST; uint32_t mip_first = base_in_sync ? 1 : 0; uint32_t mip_last = mips_in_sync ? 0 : resource_desc.MipLevels - 1; auto cbuffer_pool = command_processor_->GetConstantBufferPool(); @@ -836,16 +813,9 @@ bool TextureCache::LoadTextureData(Texture* texture) { copy_constants.guest_mip_offset[2] = 0; } for (uint32_t i = 0; i < slice_count; ++i) { - if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { - barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[0].Transition.pResource = copy_buffer; - barriers[0].Transition.Subresource = - D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barriers[0].Transition.StateBefore = copy_buffer_state; - barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - command_list->ResourceBarrier(1, barriers); - copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - } + command_processor_->PushTransitionBarrier( + copy_buffer, copy_buffer_state, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; for (uint32_t j = mip_first; j <= mip_last; ++j) { if (j == 0) { copy_constants.guest_base = @@ -885,21 +855,17 @@ bool TextureCache::LoadTextureData(Texture* texture) { } std::memcpy(cbuffer_mapping, ©_constants, sizeof(copy_constants)); command_list->SetComputeRootConstantBufferView(0, cbuffer_gpu_address); + command_processor_->SubmitBarriers(); // Each thread group processes 32x32x1 blocks. command_list->Dispatch((copy_constants.size_blocks[0] + 31) >> 5, (copy_constants.size_blocks[1] + 31) >> 5, copy_constants.size_blocks[2]); } - barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - barriers[0].UAV.pResource = copy_buffer; - barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barriers[1].Transition.pResource = copy_buffer; - barriers[1].Transition.Subresource = - D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; - command_list->ResourceBarrier(2, barriers); + command_processor_->PushUAVBarrier(copy_buffer); + command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state, + D3D12_RESOURCE_STATE_COPY_SOURCE); copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE; + command_processor_->SubmitBarriers(); UINT slice_first_subresource = i * resource_desc.MipLevels; for (uint32_t j = mip_first; j <= mip_last; ++j) { D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;