From 2be5f2cfa2d19e3388f1ec1bf33f4165a5901827 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 25 Jul 2018 20:24:39 +0300 Subject: [PATCH] [D3D12] SHM uploading --- .../gpu/d3d12/d3d12_command_processor.cc | 15 +- src/xenia/gpu/d3d12/shared_memory.cc | 250 +++++++++++++++++- src/xenia/gpu/d3d12/shared_memory.h | 32 ++- 3 files changed, 284 insertions(+), 13 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 1943507d3..1543f0b07 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -174,9 +174,18 @@ bool D3D12CommandProcessor::EndFrame() { return false; } - // TODO(Triang3l): Don't execute the setup command list if it's empty. - command_lists_setup_[current_queue_frame_]->Execute(); - command_lists_[current_queue_frame_]->Execute(); + auto command_list_setup = command_lists_setup_[current_queue_frame_].get(); + auto command_list = command_lists_[current_queue_frame_].get(); + + bool setup_written = shared_memory_->EndFrame( + command_list_setup->GetCommandList(), command_list->GetCommandList()); + + if (setup_written) { + command_list_setup->Execute(); + } else { + command_list_setup->AbortRecording(); + } + command_list->Execute(); auto context = GetD3D12Context(); context->EndSwap(); diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index bdc161802..c3f6f7aeb 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -9,8 +9,10 @@ #include "xenia/gpu/d3d12/shared_memory.h" +#include #include +#include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/memory.h" @@ -21,15 +23,19 @@ namespace d3d12 { SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context) : memory_(memory), context_(context) { - page_size_log2_ = xe::math::log2_ceil(xe::memory::page_size()); + page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size())); page_count_ = kBufferSize >> page_size_log2_; uint32_t page_bitmap_length = page_count_ >> 6; + uint32_t page_bitmap_l2_length = page_bitmap_length >> 6; + assert_true(page_bitmap_l2_length > 0); pages_in_sync_.resize(page_bitmap_length); watched_pages_.resize(page_bitmap_length); watches_triggered_l1_.resize(page_bitmap_length); - watches_triggered_l2_.resize(page_bitmap_length >> 6); + watches_triggered_l2_.resize(page_bitmap_l2_length); + + upload_pages_.resize(page_bitmap_length); } SharedMemory::~SharedMemory() { Shutdown(); } @@ -61,17 +67,35 @@ bool SharedMemory::Initialize() { heap_creation_failed_ = false; std::memset(pages_in_sync_.data(), 0, - page_in_sync_.size() * sizeof(uint64_t)); + pages_in_sync_.size() * sizeof(uint64_t)); std::memset(watched_pages_.data(), 0, watched_pages_.size() * sizeof(uint64_t)); std::memset(watches_triggered_l2_.data(), 0, watches_triggered_l2_.size() * sizeof(uint64_t)); + std::memset(upload_pages_.data(), 0, upload_pages_.size() * sizeof(uint64_t)); + upload_buffer_available_first_ = nullptr; + upload_buffer_submitted_first_ = nullptr; + upload_buffer_submitted_last_ = nullptr; + return true; } void SharedMemory::Shutdown() { + while (upload_buffer_available_first_ != nullptr) { + auto upload_buffer_next = upload_buffer_available_first_->next; + upload_buffer_available_first_->buffer->Release(); + delete upload_buffer_available_first_; + upload_buffer_available_first_ = upload_buffer_next; + } + while (upload_buffer_submitted_first_ != nullptr) { + auto upload_buffer_next = upload_buffer_submitted_first_->next; + upload_buffer_submitted_first_->buffer->Release(); + delete upload_buffer_submitted_first_; + upload_buffer_submitted_first_ = upload_buffer_next; + } + // First free the buffer to detach it from the heaps. if (buffer_ != nullptr) { buffer_->Release(); @@ -91,19 +115,201 @@ void SharedMemory::BeginFrame() { watch_mutex_.lock(); for (uint32_t i = 0; i < watches_triggered_l2_.size(); ++i) { uint64_t bits_l2 = watches_triggered_l2_[i]; - uint32_t index_l2; - while (xe::bit_scan_forward(bits_l2, &index_l2)) { - bits_l2 &= ~(1ull << index_l2); - uint32_t index_l1 = (i << 6) + index_l2; - pages_in_sync_[index_l1] &= ~(watches_triggered_l1[index_l1]); + uint32_t index_l1_local; + while (xe::bit_scan_forward(bits_l2, &index_l1_local)) { + bits_l2 &= ~(1ull << index_l1_local); + uint32_t index_l1_global = (i << 6) + index_l1_local; + pages_in_sync_[index_l1_global] &= + ~(watches_triggered_l1_[index_l1_global]); } watches_triggered_l2_[i] = 0; } watch_mutex_.unlock(); + // Make processed upload buffers available. + uint64_t last_completed_frame = context_->GetLastCompletedFrame(); + while (upload_buffer_submitted_first_ != nullptr) { + auto upload_buffer = upload_buffer_submitted_first_; + if (upload_buffer->submit_frame > last_completed_frame) { + break; + } + upload_buffer_submitted_first_ = upload_buffer->next; + upload_buffer->next = upload_buffer_available_first_; + upload_buffer_available_first_ = upload_buffer; + } + if (upload_buffer_submitted_first_ == nullptr) { + upload_buffer_submitted_last_ = nullptr; + } + heap_creation_failed_ = false; } +bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup, + ID3D12GraphicsCommandList* command_list_draw) { + // Before drawing starts, it's assumed that the buffer is a copy destination. + // This transition is for the next frame, not for the current one. + TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list_draw); + + auto current_frame = context_->GetCurrentFrame(); + auto device = context_->GetD3D12Provider()->GetDevice(); + + // Write ranges to upload buffers and submit them. + const uint32_t upload_buffer_capacity = kUploadBufferSize >> page_size_log2_; + assert_true(upload_buffer_capacity > 0); + uint32_t upload_end = 0; + void* upload_buffer_mapping = nullptr; + uint32_t upload_buffer_written = 0; + uint32_t upload_range_start = 0, upload_range_length; + while ((upload_range_start = + NextUploadRange(upload_end, upload_range_length)) != UINT_MAX) { + while (upload_range_length > 0) { + if (upload_buffer_mapping == nullptr) { + // Create a completely new upload buffer if the available pool is empty. + if (upload_buffer_available_first_ == nullptr) { + D3D12_HEAP_PROPERTIES upload_buffer_heap_properties = {}; + upload_buffer_heap_properties.Type = D3D12_HEAP_TYPE_UPLOAD; + D3D12_RESOURCE_DESC upload_buffer_desc; + upload_buffer_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + upload_buffer_desc.Alignment = 0; + upload_buffer_desc.Width = kUploadBufferSize; + upload_buffer_desc.Height = 1; + upload_buffer_desc.DepthOrArraySize = 1; + upload_buffer_desc.MipLevels = 1; + upload_buffer_desc.Format = DXGI_FORMAT_UNKNOWN; + upload_buffer_desc.SampleDesc.Count = 1; + upload_buffer_desc.SampleDesc.Quality = 0; + upload_buffer_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + upload_buffer_desc.Flags = D3D12_RESOURCE_FLAG_NONE; + ID3D12Resource* upload_buffer_resource; + if (FAILED(device->CreateCommittedResource( + &upload_buffer_heap_properties, D3D12_HEAP_FLAG_NONE, + &upload_buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, IID_PPV_ARGS(&upload_buffer_resource)))) { + XELOGE("Failed to create a shared memory upload buffer"); + break; + } + upload_buffer_available_first_ = new UploadBuffer; + upload_buffer_available_first_->buffer = upload_buffer_resource; + upload_buffer_available_first_->next = nullptr; + } + // New buffer, need to map it. + D3D12_RANGE upload_buffer_read_range; + upload_buffer_read_range.Begin = 0; + upload_buffer_read_range.End = 0; + if (FAILED(upload_buffer_available_first_->buffer->Map( + 0, &upload_buffer_read_range, &upload_buffer_mapping))) { + XELOGE("Failed to map a shared memory upload buffer"); + break; + } + } + + // Upload the portion we can upload. + uint32_t upload_write_length = std::min( + upload_range_length, upload_buffer_capacity - upload_buffer_written); + std::memcpy( + reinterpret_cast(upload_buffer_mapping) + + (upload_buffer_written << page_size_log2_), + memory_->TranslatePhysical(upload_range_start << page_size_log2_), + upload_write_length << page_size_log2_); + command_list_draw->CopyBufferRegion( + buffer_, upload_range_start << page_size_log2_, + upload_buffer_available_first_->buffer, + upload_buffer_written << page_size_log2_, + upload_write_length << page_size_log2_); + upload_buffer_written += upload_write_length; + upload_range_start += upload_write_length; + upload_range_length -= upload_write_length; + upload_end = upload_range_start; + + // Check if we are done with this buffer. + if (upload_buffer_written == upload_buffer_capacity) { + auto upload_buffer = upload_buffer_available_first_; + upload_buffer->buffer->Unmap(0, nullptr); + upload_buffer_mapping = nullptr; + upload_buffer_available_first_ = upload_buffer->next; + upload_buffer->next = nullptr; + upload_buffer->submit_frame = current_frame; + if (upload_buffer_submitted_last_ != nullptr) { + upload_buffer_submitted_last_->next = upload_buffer; + } else { + upload_buffer_submitted_first_ = upload_buffer; + } + upload_buffer_submitted_last_ = upload_buffer; + upload_buffer_written = 0; + } + } + if (upload_range_length > 0) { + // Buffer creation or mapping failed. + break; + } + } + // Mark the last upload buffer as submitted if anything was uploaded from it, + // also unmap it. + if (upload_buffer_mapping != nullptr) { + upload_buffer_available_first_->buffer->Unmap(0, nullptr); + } + if (upload_buffer_written > 0) { + auto upload_buffer = upload_buffer_available_first_; + upload_buffer_available_first_ = upload_buffer->next; + upload_buffer->next = nullptr; + upload_buffer->submit_frame = current_frame; + if (upload_buffer_submitted_last_ != nullptr) { + upload_buffer_submitted_last_->next = upload_buffer; + } else { + upload_buffer_submitted_first_ = upload_buffer; + } + upload_buffer_submitted_last_ = upload_buffer; + } + + // Mark the newly uploaded ranges as uploaded. + std::memset(upload_pages_.data(), 0, (upload_end >> 6) * sizeof(uint64_t)); + if (upload_end < page_count_) { + upload_pages_[upload_end >> 6] &= ~((1ull << (upload_end & 63)) - 1); + } + + // If some upload failed, mark the pages not uploaded as out-of-date again + // because they were marked as up-to-date when used as textures/buffers. + if (upload_range_start != UINT_MAX) { + for (uint32_t i = upload_end >> 6; i < upload_pages_.size(); ++i) { + pages_in_sync_[i] &= ~(upload_pages_[i]); + } + } + + return upload_end != 0; +} + +uint32_t SharedMemory::NextUploadRange(uint32_t search_start, + uint32_t& length) const { + uint32_t search_start_block_index = search_start >> 6; + for (uint32_t i = search_start_block_index; i < upload_pages_.size(); ++i) { + uint64_t start_block = upload_pages_[i]; + if (i == search_start_block_index) { + // Exclude already visited pages in the first checked 64-page block. + start_block &= ~((1ull << (search_start & 63)) - 1); + } + uint32_t start_page_local; + if (!xe::bit_scan_forward(start_block, &start_page_local)) { + continue; + } + // Found the beginning of a range - find the end. + uint32_t start_page = (i << 6) + start_page_local; + for (uint32_t j = i; j < upload_pages_.size(); ++j) { + uint64_t end_block = upload_pages_[i]; + if (j == i) { + end_block |= ~((1ull << start_page_local) - 1); + } + uint32_t end_page_local; + if (xe::bit_scan_forward(~end_block, &end_page_local)) { + length = ((j << 6) + end_page_local) - start_page; + return start_page; + } + } + length = page_count_ - start_page; + return start_page; + } + return UINT_MAX; +} + bool SharedMemory::UseRange(uint32_t start, uint32_t length) { if (length == 0) { // Some texture is empty, for example - safe to draw in this case. @@ -138,7 +344,7 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) { return false; } D3D12_TILED_RESOURCE_COORDINATE region_start_coordinates; - region_start_coordinates.X = i << kHeapSize; + region_start_coordinates.X = i << kHeapSizeLog2; region_start_coordinates.Y = 0; region_start_coordinates.Z = 0; region_start_coordinates.Subresource = 0; @@ -161,6 +367,32 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) { return true; } +void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state, + ID3D12GraphicsCommandList* command_list) { + if (buffer_state_ == new_state) { + return; + } + D3D12_RESOURCE_BARRIER barrier; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = buffer_; + barrier.Transition.Subresource = 0; + barrier.Transition.StateBefore = buffer_state_; + barrier.Transition.StateAfter = new_state; + command_list->ResourceBarrier(1, &barrier); + buffer_state_ = new_state; +} + +void SharedMemory::UseForReading(ID3D12GraphicsCommandList* command_list) { + TransitionBuffer(D3D12_RESOURCE_STATE_INDEX_BUFFER | + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, + command_list); +} + +void SharedMemory::UseForWriting(ID3D12GraphicsCommandList* command_list) { + TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, command_list); +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index 89a42185b..b37e6aa5c 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -32,6 +32,10 @@ class SharedMemory { void Shutdown(); void BeginFrame(); + // Returns true if anything has been written to command_list been done. + // The draw command list is needed for the transition. + bool EndFrame(ID3D12GraphicsCommandList* command_list_setup, + ID3D12GraphicsCommandList* command_list_draw); // Marks the range as used in this frame, queues it for upload if it was // modified. Ensures the backing memory for the address range is present in @@ -40,6 +44,11 @@ class SharedMemory { // least). bool UseRange(uint32_t start, uint32_t length); + // Makes the buffer usable for vertices, indices and texture untiling. + void UseForReading(ID3D12GraphicsCommandList* command_list); + // Makes the buffer usable for texture tiling after a resolve. + void UseForWriting(ID3D12GraphicsCommandList* command_list); + private: Memory* memory_; @@ -71,7 +80,7 @@ class SharedMemory { // Bit vector containing whether physical memory system pages are up to date. std::vector pages_in_sync_; - // Watched page management - must be synchronized. + // Mutex for the watched pages and the triggered watches. std::mutex watch_mutex_; // Whether each physical page is watched by the GPU (after uploading). // Once a watch is triggered, it's not watched anymore. @@ -81,8 +90,29 @@ class SharedMemory { // Because this is done with a locked CPU-GPU mutex, it's stored in 2 levels, // so unmodified pages can be skipped quickly, and clearing is also fast. // On L1, each bit corresponds to a single page, on L2, to 64 pages. + // Checking if L2 is non-zero before accessing L1 is REQUIRED since L1 is not + // cleared! std::vector watches_triggered_l1_; std::vector watches_triggered_l2_; + + // Pages that need to be uploaded in this frame (that are used but modified). + std::vector upload_pages_; + static constexpr uint32_t kUploadBufferSize = 4 * 1024 * 1024; + struct UploadBuffer { + ID3D12Resource* buffer; + // Next free or submitted upload buffer. + UploadBuffer* next; + // When this buffer was submitted (only valid for submitted buffers). + uint64_t submit_frame; + }; + // Buffers are moved to available in BeginFrame and to submitted in EndFrame. + UploadBuffer* upload_buffer_submitted_first_ = nullptr; + UploadBuffer* upload_buffer_submitted_last_ = nullptr; + UploadBuffer* upload_buffer_available_first_ = nullptr; + uint32_t NextUploadRange(uint32_t search_start, uint32_t& length) const; + + void TransitionBuffer(D3D12_RESOURCE_STATES new_state, + ID3D12GraphicsCommandList* command_list); }; } // namespace d3d12