From ba7b6d60817fbc3ee64d2c624498e568664e36cf Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 25 Jan 2019 16:33:25 +0300 Subject: [PATCH] [D3D12] Memexport CPU readback, disabled by default --- .../gpu/d3d12/d3d12_command_processor.cc | 86 ++++++++++++++++++- src/xenia/gpu/d3d12/d3d12_command_processor.h | 8 ++ src/xenia/ui/d3d12/d3d12_util.cc | 2 + src/xenia/ui/d3d12/d3d12_util.h | 1 + 4 files changed, 94 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 456f93a1e..429680790 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -32,6 +32,12 @@ DEFINE_bool(d3d12_edram_rov, true, // disable half-pixel offset by setting this to false. DEFINE_bool(d3d12_half_pixel_offset, true, "Enable half-pixel vertex and VPOS offset."); +DEFINE_bool(d3d12_memexport_readback, false, + "Read data written by memory export in shaders on the CPU. This " + "may be needed in some games (but many only access exported data " + "on the GPU, and this flag isn't needed to handle such behavior), " + "but causes mid-frame synchronization, so it has a huge " + "performance impact."); DEFINE_bool(d3d12_ssaa_custom_sample_positions, false, "Enable custom SSAA sample positions for the RTV/DSV rendering " "path where available instead of centers (experimental, not very " @@ -827,6 +833,9 @@ void D3D12CommandProcessor::ShutdownContext() { auto context = GetD3D12Context(); context->AwaitAllFramesCompletion(); + ui::d3d12::util::ReleaseAndNull(readback_buffer_); + readback_buffer_size_ = 0; + ui::d3d12::util::ReleaseAndNull(scratch_buffer_); scratch_buffer_size_ = 0; @@ -1121,7 +1130,8 @@ Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type, bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) { - auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); + auto context = GetD3D12Context(); + auto device = context->GetD3D12Provider()->GetDevice(); auto& regs = *register_file_; #if FINE_GRAINED_DRAW_SCOPES @@ -1578,10 +1588,53 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, memexport_range.base_address_dwords << 2, memexport_range.size_dwords << 2); } + if (FLAGS_d3d12_memexport_readback) { + // Read the exported data on the CPU. + uint32_t memexport_total_size = 0; + for (uint32_t i = 0; i < memexport_range_count; ++i) { + memexport_total_size += memexport_ranges[i].size_dwords << 2; + } + if (memexport_total_size != 0) { + ID3D12Resource* readback_buffer = + RequestReadbackBuffer(memexport_total_size); + if (readback_buffer != nullptr) { + shared_memory_->UseAsCopySource(); + SubmitBarriers(); + ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer(); + uint32_t readback_buffer_offset = 0; + for (uint32_t i = 0; i < memexport_range_count; ++i) { + const MemExportRange& memexport_range = memexport_ranges[i]; + uint32_t memexport_range_size = memexport_range.size_dwords << 2; + deferred_command_list_->D3DCopyBufferRegion( + readback_buffer, readback_buffer_offset, shared_memory_buffer, + memexport_range.base_address_dwords << 2, memexport_range_size); + readback_buffer_offset += memexport_range_size; + } + EndFrame(); + context->AwaitAllFramesCompletion(); + D3D12_RANGE readback_range; + readback_range.Begin = 0; + readback_range.End = memexport_total_size; + void* readback_mapping; + if (SUCCEEDED(readback_buffer->Map(0, &readback_range, + &readback_mapping))) { + const uint32_t* readback_dwords = + reinterpret_cast(readback_mapping); + for (uint32_t i = 0; i < memexport_range_count; ++i) { + const MemExportRange& memexport_range = memexport_ranges[i]; + std::memcpy(memory_->TranslatePhysical( + memexport_range.base_address_dwords << 2), + readback_dwords, memexport_range.size_dwords << 2); + readback_dwords += memexport_range.size_dwords; + } + D3D12_RANGE readback_write_range = {}; + readback_buffer->Unmap(0, &readback_write_range); + } + } + } + } } - // TODO(Triang3l): Read back memexported data if the respective gflag is set. - return true; } @@ -3057,6 +3110,33 @@ uint32_t D3D12CommandProcessor::GetSupportedMemExportFormatSize( return 0; } +ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) { + if (size == 0) { + return nullptr; + } + size = xe::align(size, kReadbackBufferSizeIncrement); + if (size > readback_buffer_size_) { + auto context = GetD3D12Context(); + auto device = context->GetD3D12Provider()->GetDevice(); + D3D12_RESOURCE_DESC buffer_desc; + ui::d3d12::util::FillBufferResourceDesc(buffer_desc, size, + D3D12_RESOURCE_FLAG_NONE); + ID3D12Resource* buffer; + if (FAILED(device->CreateCommittedResource( + &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, + &buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(&buffer)))) { + XELOGE("Failed to create a %u MB readback buffer", size >> 20); + return nullptr; + } + if (readback_buffer_ != nullptr) { + readback_buffer_->Release(); + } + readback_buffer_ = buffer; + } + return readback_buffer_; +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 8037626b3..3d6fb6c05 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -225,6 +225,10 @@ class D3D12CommandProcessor : public CommandProcessor { // 32 bits per element. static uint32_t GetSupportedMemExportFormatSize(ColorFormat format); + // Returns a buffer for reading GPU data back to the CPU. Assuming + // synchronizing immediately after use. Always in COPY_DEST state. + ID3D12Resource* RequestReadbackBuffer(uint32_t size); + bool cache_clear_requested_ = false; std::unique_ptr @@ -281,6 +285,10 @@ class D3D12CommandProcessor : public CommandProcessor { D3D12_RESOURCE_STATES scratch_buffer_state_; bool scratch_buffer_used_ = false; + static constexpr uint32_t kReadbackBufferSizeIncrement = 16 * 1024 * 1024; + ID3D12Resource* readback_buffer_ = nullptr; + uint32_t readback_buffer_size_ = 0; + uint32_t current_queue_frame_ = UINT32_MAX; std::atomic pix_capture_requested_ = false; diff --git a/src/xenia/ui/d3d12/d3d12_util.cc b/src/xenia/ui/d3d12/d3d12_util.cc index 923301f86..5bbd566f2 100644 --- a/src/xenia/ui/d3d12/d3d12_util.cc +++ b/src/xenia/ui/d3d12/d3d12_util.cc @@ -19,6 +19,8 @@ namespace util { const D3D12_HEAP_PROPERTIES kHeapPropertiesDefault = {D3D12_HEAP_TYPE_DEFAULT}; const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload = {D3D12_HEAP_TYPE_UPLOAD}; +const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback = { + D3D12_HEAP_TYPE_READBACK}; ID3D12RootSignature* CreateRootSignature( D3D12Provider* provider, const D3D12_ROOT_SIGNATURE_DESC& desc) { diff --git a/src/xenia/ui/d3d12/d3d12_util.h b/src/xenia/ui/d3d12/d3d12_util.h index 926498e64..947ab9c44 100644 --- a/src/xenia/ui/d3d12/d3d12_util.h +++ b/src/xenia/ui/d3d12/d3d12_util.h @@ -19,6 +19,7 @@ namespace util { extern const D3D12_HEAP_PROPERTIES kHeapPropertiesDefault; extern const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload; +extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback; template inline bool ReleaseAndNull(T& object) {