[D3D12] SHM uploading

This commit is contained in:
Triang3l 2018-07-25 20:24:39 +03:00
parent bbabbc2439
commit 2be5f2cfa2
3 changed files with 284 additions and 13 deletions

View File

@ -174,9 +174,18 @@ bool D3D12CommandProcessor::EndFrame() {
return false;
}
// TODO(Triang3l): Don't execute the setup command list if it's empty.
command_lists_setup_[current_queue_frame_]->Execute();
command_lists_[current_queue_frame_]->Execute();
auto command_list_setup = command_lists_setup_[current_queue_frame_].get();
auto command_list = command_lists_[current_queue_frame_].get();
bool setup_written = shared_memory_->EndFrame(
command_list_setup->GetCommandList(), command_list->GetCommandList());
if (setup_written) {
command_list_setup->Execute();
} else {
command_list_setup->AbortRecording();
}
command_list->Execute();
auto context = GetD3D12Context();
context->EndSwap();

View File

@ -9,8 +9,10 @@
#include "xenia/gpu/d3d12/shared_memory.h"
#include <algorithm>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
@ -21,15 +23,19 @@ namespace d3d12 {
SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context)
: memory_(memory), context_(context) {
page_size_log2_ = xe::math::log2_ceil(xe::memory::page_size());
page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size()));
page_count_ = kBufferSize >> page_size_log2_;
uint32_t page_bitmap_length = page_count_ >> 6;
uint32_t page_bitmap_l2_length = page_bitmap_length >> 6;
assert_true(page_bitmap_l2_length > 0);
pages_in_sync_.resize(page_bitmap_length);
watched_pages_.resize(page_bitmap_length);
watches_triggered_l1_.resize(page_bitmap_length);
watches_triggered_l2_.resize(page_bitmap_length >> 6);
watches_triggered_l2_.resize(page_bitmap_l2_length);
upload_pages_.resize(page_bitmap_length);
}
SharedMemory::~SharedMemory() { Shutdown(); }
@ -61,17 +67,35 @@ bool SharedMemory::Initialize() {
heap_creation_failed_ = false;
std::memset(pages_in_sync_.data(), 0,
page_in_sync_.size() * sizeof(uint64_t));
pages_in_sync_.size() * sizeof(uint64_t));
std::memset(watched_pages_.data(), 0,
watched_pages_.size() * sizeof(uint64_t));
std::memset(watches_triggered_l2_.data(), 0,
watches_triggered_l2_.size() * sizeof(uint64_t));
std::memset(upload_pages_.data(), 0, upload_pages_.size() * sizeof(uint64_t));
upload_buffer_available_first_ = nullptr;
upload_buffer_submitted_first_ = nullptr;
upload_buffer_submitted_last_ = nullptr;
return true;
}
void SharedMemory::Shutdown() {
while (upload_buffer_available_first_ != nullptr) {
auto upload_buffer_next = upload_buffer_available_first_->next;
upload_buffer_available_first_->buffer->Release();
delete upload_buffer_available_first_;
upload_buffer_available_first_ = upload_buffer_next;
}
while (upload_buffer_submitted_first_ != nullptr) {
auto upload_buffer_next = upload_buffer_submitted_first_->next;
upload_buffer_submitted_first_->buffer->Release();
delete upload_buffer_submitted_first_;
upload_buffer_submitted_first_ = upload_buffer_next;
}
// First free the buffer to detach it from the heaps.
if (buffer_ != nullptr) {
buffer_->Release();
@ -91,19 +115,201 @@ void SharedMemory::BeginFrame() {
watch_mutex_.lock();
for (uint32_t i = 0; i < watches_triggered_l2_.size(); ++i) {
uint64_t bits_l2 = watches_triggered_l2_[i];
uint32_t index_l2;
while (xe::bit_scan_forward(bits_l2, &index_l2)) {
bits_l2 &= ~(1ull << index_l2);
uint32_t index_l1 = (i << 6) + index_l2;
pages_in_sync_[index_l1] &= ~(watches_triggered_l1[index_l1]);
uint32_t index_l1_local;
while (xe::bit_scan_forward(bits_l2, &index_l1_local)) {
bits_l2 &= ~(1ull << index_l1_local);
uint32_t index_l1_global = (i << 6) + index_l1_local;
pages_in_sync_[index_l1_global] &=
~(watches_triggered_l1_[index_l1_global]);
}
watches_triggered_l2_[i] = 0;
}
watch_mutex_.unlock();
// Make processed upload buffers available.
uint64_t last_completed_frame = context_->GetLastCompletedFrame();
while (upload_buffer_submitted_first_ != nullptr) {
auto upload_buffer = upload_buffer_submitted_first_;
if (upload_buffer->submit_frame > last_completed_frame) {
break;
}
upload_buffer_submitted_first_ = upload_buffer->next;
upload_buffer->next = upload_buffer_available_first_;
upload_buffer_available_first_ = upload_buffer;
}
if (upload_buffer_submitted_first_ == nullptr) {
upload_buffer_submitted_last_ = nullptr;
}
heap_creation_failed_ = false;
}
bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup,
ID3D12GraphicsCommandList* command_list_draw) {
// Before drawing starts, it's assumed that the buffer is a copy destination.
// This transition is for the next frame, not for the current one.
TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list_draw);
auto current_frame = context_->GetCurrentFrame();
auto device = context_->GetD3D12Provider()->GetDevice();
// Write ranges to upload buffers and submit them.
const uint32_t upload_buffer_capacity = kUploadBufferSize >> page_size_log2_;
assert_true(upload_buffer_capacity > 0);
uint32_t upload_end = 0;
void* upload_buffer_mapping = nullptr;
uint32_t upload_buffer_written = 0;
uint32_t upload_range_start = 0, upload_range_length;
while ((upload_range_start =
NextUploadRange(upload_end, upload_range_length)) != UINT_MAX) {
while (upload_range_length > 0) {
if (upload_buffer_mapping == nullptr) {
// Create a completely new upload buffer if the available pool is empty.
if (upload_buffer_available_first_ == nullptr) {
D3D12_HEAP_PROPERTIES upload_buffer_heap_properties = {};
upload_buffer_heap_properties.Type = D3D12_HEAP_TYPE_UPLOAD;
D3D12_RESOURCE_DESC upload_buffer_desc;
upload_buffer_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
upload_buffer_desc.Alignment = 0;
upload_buffer_desc.Width = kUploadBufferSize;
upload_buffer_desc.Height = 1;
upload_buffer_desc.DepthOrArraySize = 1;
upload_buffer_desc.MipLevels = 1;
upload_buffer_desc.Format = DXGI_FORMAT_UNKNOWN;
upload_buffer_desc.SampleDesc.Count = 1;
upload_buffer_desc.SampleDesc.Quality = 0;
upload_buffer_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
upload_buffer_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
ID3D12Resource* upload_buffer_resource;
if (FAILED(device->CreateCommittedResource(
&upload_buffer_heap_properties, D3D12_HEAP_FLAG_NONE,
&upload_buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr, IID_PPV_ARGS(&upload_buffer_resource)))) {
XELOGE("Failed to create a shared memory upload buffer");
break;
}
upload_buffer_available_first_ = new UploadBuffer;
upload_buffer_available_first_->buffer = upload_buffer_resource;
upload_buffer_available_first_->next = nullptr;
}
// New buffer, need to map it.
D3D12_RANGE upload_buffer_read_range;
upload_buffer_read_range.Begin = 0;
upload_buffer_read_range.End = 0;
if (FAILED(upload_buffer_available_first_->buffer->Map(
0, &upload_buffer_read_range, &upload_buffer_mapping))) {
XELOGE("Failed to map a shared memory upload buffer");
break;
}
}
// Upload the portion we can upload.
uint32_t upload_write_length = std::min(
upload_range_length, upload_buffer_capacity - upload_buffer_written);
std::memcpy(
reinterpret_cast<uint8_t*>(upload_buffer_mapping) +
(upload_buffer_written << page_size_log2_),
memory_->TranslatePhysical(upload_range_start << page_size_log2_),
upload_write_length << page_size_log2_);
command_list_draw->CopyBufferRegion(
buffer_, upload_range_start << page_size_log2_,
upload_buffer_available_first_->buffer,
upload_buffer_written << page_size_log2_,
upload_write_length << page_size_log2_);
upload_buffer_written += upload_write_length;
upload_range_start += upload_write_length;
upload_range_length -= upload_write_length;
upload_end = upload_range_start;
// Check if we are done with this buffer.
if (upload_buffer_written == upload_buffer_capacity) {
auto upload_buffer = upload_buffer_available_first_;
upload_buffer->buffer->Unmap(0, nullptr);
upload_buffer_mapping = nullptr;
upload_buffer_available_first_ = upload_buffer->next;
upload_buffer->next = nullptr;
upload_buffer->submit_frame = current_frame;
if (upload_buffer_submitted_last_ != nullptr) {
upload_buffer_submitted_last_->next = upload_buffer;
} else {
upload_buffer_submitted_first_ = upload_buffer;
}
upload_buffer_submitted_last_ = upload_buffer;
upload_buffer_written = 0;
}
}
if (upload_range_length > 0) {
// Buffer creation or mapping failed.
break;
}
}
// Mark the last upload buffer as submitted if anything was uploaded from it,
// also unmap it.
if (upload_buffer_mapping != nullptr) {
upload_buffer_available_first_->buffer->Unmap(0, nullptr);
}
if (upload_buffer_written > 0) {
auto upload_buffer = upload_buffer_available_first_;
upload_buffer_available_first_ = upload_buffer->next;
upload_buffer->next = nullptr;
upload_buffer->submit_frame = current_frame;
if (upload_buffer_submitted_last_ != nullptr) {
upload_buffer_submitted_last_->next = upload_buffer;
} else {
upload_buffer_submitted_first_ = upload_buffer;
}
upload_buffer_submitted_last_ = upload_buffer;
}
// Mark the newly uploaded ranges as uploaded.
std::memset(upload_pages_.data(), 0, (upload_end >> 6) * sizeof(uint64_t));
if (upload_end < page_count_) {
upload_pages_[upload_end >> 6] &= ~((1ull << (upload_end & 63)) - 1);
}
// If some upload failed, mark the pages not uploaded as out-of-date again
// because they were marked as up-to-date when used as textures/buffers.
if (upload_range_start != UINT_MAX) {
for (uint32_t i = upload_end >> 6; i < upload_pages_.size(); ++i) {
pages_in_sync_[i] &= ~(upload_pages_[i]);
}
}
return upload_end != 0;
}
uint32_t SharedMemory::NextUploadRange(uint32_t search_start,
uint32_t& length) const {
uint32_t search_start_block_index = search_start >> 6;
for (uint32_t i = search_start_block_index; i < upload_pages_.size(); ++i) {
uint64_t start_block = upload_pages_[i];
if (i == search_start_block_index) {
// Exclude already visited pages in the first checked 64-page block.
start_block &= ~((1ull << (search_start & 63)) - 1);
}
uint32_t start_page_local;
if (!xe::bit_scan_forward(start_block, &start_page_local)) {
continue;
}
// Found the beginning of a range - find the end.
uint32_t start_page = (i << 6) + start_page_local;
for (uint32_t j = i; j < upload_pages_.size(); ++j) {
uint64_t end_block = upload_pages_[i];
if (j == i) {
end_block |= ~((1ull << start_page_local) - 1);
}
uint32_t end_page_local;
if (xe::bit_scan_forward(~end_block, &end_page_local)) {
length = ((j << 6) + end_page_local) - start_page;
return start_page;
}
}
length = page_count_ - start_page;
return start_page;
}
return UINT_MAX;
}
bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
if (length == 0) {
// Some texture is empty, for example - safe to draw in this case.
@ -138,7 +344,7 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
return false;
}
D3D12_TILED_RESOURCE_COORDINATE region_start_coordinates;
region_start_coordinates.X = i << kHeapSize;
region_start_coordinates.X = i << kHeapSizeLog2;
region_start_coordinates.Y = 0;
region_start_coordinates.Z = 0;
region_start_coordinates.Subresource = 0;
@ -161,6 +367,32 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
return true;
}
void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state,
ID3D12GraphicsCommandList* command_list) {
if (buffer_state_ == new_state) {
return;
}
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = buffer_;
barrier.Transition.Subresource = 0;
barrier.Transition.StateBefore = buffer_state_;
barrier.Transition.StateAfter = new_state;
command_list->ResourceBarrier(1, &barrier);
buffer_state_ = new_state;
}
void SharedMemory::UseForReading(ID3D12GraphicsCommandList* command_list) {
TransitionBuffer(D3D12_RESOURCE_STATE_INDEX_BUFFER |
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
command_list);
}
void SharedMemory::UseForWriting(ID3D12GraphicsCommandList* command_list) {
TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, command_list);
}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -32,6 +32,10 @@ class SharedMemory {
void Shutdown();
void BeginFrame();
// Returns true if anything has been written to command_list been done.
// The draw command list is needed for the transition.
bool EndFrame(ID3D12GraphicsCommandList* command_list_setup,
ID3D12GraphicsCommandList* command_list_draw);
// Marks the range as used in this frame, queues it for upload if it was
// modified. Ensures the backing memory for the address range is present in
@ -40,6 +44,11 @@ class SharedMemory {
// least).
bool UseRange(uint32_t start, uint32_t length);
// Makes the buffer usable for vertices, indices and texture untiling.
void UseForReading(ID3D12GraphicsCommandList* command_list);
// Makes the buffer usable for texture tiling after a resolve.
void UseForWriting(ID3D12GraphicsCommandList* command_list);
private:
Memory* memory_;
@ -71,7 +80,7 @@ class SharedMemory {
// Bit vector containing whether physical memory system pages are up to date.
std::vector<uint64_t> pages_in_sync_;
// Watched page management - must be synchronized.
// Mutex for the watched pages and the triggered watches.
std::mutex watch_mutex_;
// Whether each physical page is watched by the GPU (after uploading).
// Once a watch is triggered, it's not watched anymore.
@ -81,8 +90,29 @@ class SharedMemory {
// Because this is done with a locked CPU-GPU mutex, it's stored in 2 levels,
// so unmodified pages can be skipped quickly, and clearing is also fast.
// On L1, each bit corresponds to a single page, on L2, to 64 pages.
// Checking if L2 is non-zero before accessing L1 is REQUIRED since L1 is not
// cleared!
std::vector<uint64_t> watches_triggered_l1_;
std::vector<uint64_t> watches_triggered_l2_;
// Pages that need to be uploaded in this frame (that are used but modified).
std::vector<uint64_t> upload_pages_;
static constexpr uint32_t kUploadBufferSize = 4 * 1024 * 1024;
struct UploadBuffer {
ID3D12Resource* buffer;
// Next free or submitted upload buffer.
UploadBuffer* next;
// When this buffer was submitted (only valid for submitted buffers).
uint64_t submit_frame;
};
// Buffers are moved to available in BeginFrame and to submitted in EndFrame.
UploadBuffer* upload_buffer_submitted_first_ = nullptr;
UploadBuffer* upload_buffer_submitted_last_ = nullptr;
UploadBuffer* upload_buffer_available_first_ = nullptr;
uint32_t NextUploadRange(uint32_t search_start, uint32_t& length) const;
void TransitionBuffer(D3D12_RESOURCE_STATES new_state,
ID3D12GraphicsCommandList* command_list);
};
} // namespace d3d12