[D3D12] SHM uploading
This commit is contained in:
parent
bbabbc2439
commit
2be5f2cfa2
|
@ -174,9 +174,18 @@ bool D3D12CommandProcessor::EndFrame() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Don't execute the setup command list if it's empty.
|
||||
command_lists_setup_[current_queue_frame_]->Execute();
|
||||
command_lists_[current_queue_frame_]->Execute();
|
||||
auto command_list_setup = command_lists_setup_[current_queue_frame_].get();
|
||||
auto command_list = command_lists_[current_queue_frame_].get();
|
||||
|
||||
bool setup_written = shared_memory_->EndFrame(
|
||||
command_list_setup->GetCommandList(), command_list->GetCommandList());
|
||||
|
||||
if (setup_written) {
|
||||
command_list_setup->Execute();
|
||||
} else {
|
||||
command_list_setup->AbortRecording();
|
||||
}
|
||||
command_list->Execute();
|
||||
|
||||
auto context = GetD3D12Context();
|
||||
context->EndSwap();
|
||||
|
|
|
@ -9,8 +9,10 @@
|
|||
|
||||
#include "xenia/gpu/d3d12/shared_memory.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
|
@ -21,15 +23,19 @@ namespace d3d12 {
|
|||
|
||||
SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context)
|
||||
: memory_(memory), context_(context) {
|
||||
page_size_log2_ = xe::math::log2_ceil(xe::memory::page_size());
|
||||
page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size()));
|
||||
page_count_ = kBufferSize >> page_size_log2_;
|
||||
uint32_t page_bitmap_length = page_count_ >> 6;
|
||||
uint32_t page_bitmap_l2_length = page_bitmap_length >> 6;
|
||||
assert_true(page_bitmap_l2_length > 0);
|
||||
|
||||
pages_in_sync_.resize(page_bitmap_length);
|
||||
|
||||
watched_pages_.resize(page_bitmap_length);
|
||||
watches_triggered_l1_.resize(page_bitmap_length);
|
||||
watches_triggered_l2_.resize(page_bitmap_length >> 6);
|
||||
watches_triggered_l2_.resize(page_bitmap_l2_length);
|
||||
|
||||
upload_pages_.resize(page_bitmap_length);
|
||||
}
|
||||
|
||||
SharedMemory::~SharedMemory() { Shutdown(); }
|
||||
|
@ -61,17 +67,35 @@ bool SharedMemory::Initialize() {
|
|||
heap_creation_failed_ = false;
|
||||
|
||||
std::memset(pages_in_sync_.data(), 0,
|
||||
page_in_sync_.size() * sizeof(uint64_t));
|
||||
pages_in_sync_.size() * sizeof(uint64_t));
|
||||
|
||||
std::memset(watched_pages_.data(), 0,
|
||||
watched_pages_.size() * sizeof(uint64_t));
|
||||
std::memset(watches_triggered_l2_.data(), 0,
|
||||
watches_triggered_l2_.size() * sizeof(uint64_t));
|
||||
|
||||
std::memset(upload_pages_.data(), 0, upload_pages_.size() * sizeof(uint64_t));
|
||||
upload_buffer_available_first_ = nullptr;
|
||||
upload_buffer_submitted_first_ = nullptr;
|
||||
upload_buffer_submitted_last_ = nullptr;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SharedMemory::Shutdown() {
|
||||
while (upload_buffer_available_first_ != nullptr) {
|
||||
auto upload_buffer_next = upload_buffer_available_first_->next;
|
||||
upload_buffer_available_first_->buffer->Release();
|
||||
delete upload_buffer_available_first_;
|
||||
upload_buffer_available_first_ = upload_buffer_next;
|
||||
}
|
||||
while (upload_buffer_submitted_first_ != nullptr) {
|
||||
auto upload_buffer_next = upload_buffer_submitted_first_->next;
|
||||
upload_buffer_submitted_first_->buffer->Release();
|
||||
delete upload_buffer_submitted_first_;
|
||||
upload_buffer_submitted_first_ = upload_buffer_next;
|
||||
}
|
||||
|
||||
// First free the buffer to detach it from the heaps.
|
||||
if (buffer_ != nullptr) {
|
||||
buffer_->Release();
|
||||
|
@ -91,19 +115,201 @@ void SharedMemory::BeginFrame() {
|
|||
watch_mutex_.lock();
|
||||
for (uint32_t i = 0; i < watches_triggered_l2_.size(); ++i) {
|
||||
uint64_t bits_l2 = watches_triggered_l2_[i];
|
||||
uint32_t index_l2;
|
||||
while (xe::bit_scan_forward(bits_l2, &index_l2)) {
|
||||
bits_l2 &= ~(1ull << index_l2);
|
||||
uint32_t index_l1 = (i << 6) + index_l2;
|
||||
pages_in_sync_[index_l1] &= ~(watches_triggered_l1[index_l1]);
|
||||
uint32_t index_l1_local;
|
||||
while (xe::bit_scan_forward(bits_l2, &index_l1_local)) {
|
||||
bits_l2 &= ~(1ull << index_l1_local);
|
||||
uint32_t index_l1_global = (i << 6) + index_l1_local;
|
||||
pages_in_sync_[index_l1_global] &=
|
||||
~(watches_triggered_l1_[index_l1_global]);
|
||||
}
|
||||
watches_triggered_l2_[i] = 0;
|
||||
}
|
||||
watch_mutex_.unlock();
|
||||
|
||||
// Make processed upload buffers available.
|
||||
uint64_t last_completed_frame = context_->GetLastCompletedFrame();
|
||||
while (upload_buffer_submitted_first_ != nullptr) {
|
||||
auto upload_buffer = upload_buffer_submitted_first_;
|
||||
if (upload_buffer->submit_frame > last_completed_frame) {
|
||||
break;
|
||||
}
|
||||
upload_buffer_submitted_first_ = upload_buffer->next;
|
||||
upload_buffer->next = upload_buffer_available_first_;
|
||||
upload_buffer_available_first_ = upload_buffer;
|
||||
}
|
||||
if (upload_buffer_submitted_first_ == nullptr) {
|
||||
upload_buffer_submitted_last_ = nullptr;
|
||||
}
|
||||
|
||||
heap_creation_failed_ = false;
|
||||
}
|
||||
|
||||
bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup,
|
||||
ID3D12GraphicsCommandList* command_list_draw) {
|
||||
// Before drawing starts, it's assumed that the buffer is a copy destination.
|
||||
// This transition is for the next frame, not for the current one.
|
||||
TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list_draw);
|
||||
|
||||
auto current_frame = context_->GetCurrentFrame();
|
||||
auto device = context_->GetD3D12Provider()->GetDevice();
|
||||
|
||||
// Write ranges to upload buffers and submit them.
|
||||
const uint32_t upload_buffer_capacity = kUploadBufferSize >> page_size_log2_;
|
||||
assert_true(upload_buffer_capacity > 0);
|
||||
uint32_t upload_end = 0;
|
||||
void* upload_buffer_mapping = nullptr;
|
||||
uint32_t upload_buffer_written = 0;
|
||||
uint32_t upload_range_start = 0, upload_range_length;
|
||||
while ((upload_range_start =
|
||||
NextUploadRange(upload_end, upload_range_length)) != UINT_MAX) {
|
||||
while (upload_range_length > 0) {
|
||||
if (upload_buffer_mapping == nullptr) {
|
||||
// Create a completely new upload buffer if the available pool is empty.
|
||||
if (upload_buffer_available_first_ == nullptr) {
|
||||
D3D12_HEAP_PROPERTIES upload_buffer_heap_properties = {};
|
||||
upload_buffer_heap_properties.Type = D3D12_HEAP_TYPE_UPLOAD;
|
||||
D3D12_RESOURCE_DESC upload_buffer_desc;
|
||||
upload_buffer_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
upload_buffer_desc.Alignment = 0;
|
||||
upload_buffer_desc.Width = kUploadBufferSize;
|
||||
upload_buffer_desc.Height = 1;
|
||||
upload_buffer_desc.DepthOrArraySize = 1;
|
||||
upload_buffer_desc.MipLevels = 1;
|
||||
upload_buffer_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
upload_buffer_desc.SampleDesc.Count = 1;
|
||||
upload_buffer_desc.SampleDesc.Quality = 0;
|
||||
upload_buffer_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||
upload_buffer_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||
ID3D12Resource* upload_buffer_resource;
|
||||
if (FAILED(device->CreateCommittedResource(
|
||||
&upload_buffer_heap_properties, D3D12_HEAP_FLAG_NONE,
|
||||
&upload_buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ,
|
||||
nullptr, IID_PPV_ARGS(&upload_buffer_resource)))) {
|
||||
XELOGE("Failed to create a shared memory upload buffer");
|
||||
break;
|
||||
}
|
||||
upload_buffer_available_first_ = new UploadBuffer;
|
||||
upload_buffer_available_first_->buffer = upload_buffer_resource;
|
||||
upload_buffer_available_first_->next = nullptr;
|
||||
}
|
||||
// New buffer, need to map it.
|
||||
D3D12_RANGE upload_buffer_read_range;
|
||||
upload_buffer_read_range.Begin = 0;
|
||||
upload_buffer_read_range.End = 0;
|
||||
if (FAILED(upload_buffer_available_first_->buffer->Map(
|
||||
0, &upload_buffer_read_range, &upload_buffer_mapping))) {
|
||||
XELOGE("Failed to map a shared memory upload buffer");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Upload the portion we can upload.
|
||||
uint32_t upload_write_length = std::min(
|
||||
upload_range_length, upload_buffer_capacity - upload_buffer_written);
|
||||
std::memcpy(
|
||||
reinterpret_cast<uint8_t*>(upload_buffer_mapping) +
|
||||
(upload_buffer_written << page_size_log2_),
|
||||
memory_->TranslatePhysical(upload_range_start << page_size_log2_),
|
||||
upload_write_length << page_size_log2_);
|
||||
command_list_draw->CopyBufferRegion(
|
||||
buffer_, upload_range_start << page_size_log2_,
|
||||
upload_buffer_available_first_->buffer,
|
||||
upload_buffer_written << page_size_log2_,
|
||||
upload_write_length << page_size_log2_);
|
||||
upload_buffer_written += upload_write_length;
|
||||
upload_range_start += upload_write_length;
|
||||
upload_range_length -= upload_write_length;
|
||||
upload_end = upload_range_start;
|
||||
|
||||
// Check if we are done with this buffer.
|
||||
if (upload_buffer_written == upload_buffer_capacity) {
|
||||
auto upload_buffer = upload_buffer_available_first_;
|
||||
upload_buffer->buffer->Unmap(0, nullptr);
|
||||
upload_buffer_mapping = nullptr;
|
||||
upload_buffer_available_first_ = upload_buffer->next;
|
||||
upload_buffer->next = nullptr;
|
||||
upload_buffer->submit_frame = current_frame;
|
||||
if (upload_buffer_submitted_last_ != nullptr) {
|
||||
upload_buffer_submitted_last_->next = upload_buffer;
|
||||
} else {
|
||||
upload_buffer_submitted_first_ = upload_buffer;
|
||||
}
|
||||
upload_buffer_submitted_last_ = upload_buffer;
|
||||
upload_buffer_written = 0;
|
||||
}
|
||||
}
|
||||
if (upload_range_length > 0) {
|
||||
// Buffer creation or mapping failed.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Mark the last upload buffer as submitted if anything was uploaded from it,
|
||||
// also unmap it.
|
||||
if (upload_buffer_mapping != nullptr) {
|
||||
upload_buffer_available_first_->buffer->Unmap(0, nullptr);
|
||||
}
|
||||
if (upload_buffer_written > 0) {
|
||||
auto upload_buffer = upload_buffer_available_first_;
|
||||
upload_buffer_available_first_ = upload_buffer->next;
|
||||
upload_buffer->next = nullptr;
|
||||
upload_buffer->submit_frame = current_frame;
|
||||
if (upload_buffer_submitted_last_ != nullptr) {
|
||||
upload_buffer_submitted_last_->next = upload_buffer;
|
||||
} else {
|
||||
upload_buffer_submitted_first_ = upload_buffer;
|
||||
}
|
||||
upload_buffer_submitted_last_ = upload_buffer;
|
||||
}
|
||||
|
||||
// Mark the newly uploaded ranges as uploaded.
|
||||
std::memset(upload_pages_.data(), 0, (upload_end >> 6) * sizeof(uint64_t));
|
||||
if (upload_end < page_count_) {
|
||||
upload_pages_[upload_end >> 6] &= ~((1ull << (upload_end & 63)) - 1);
|
||||
}
|
||||
|
||||
// If some upload failed, mark the pages not uploaded as out-of-date again
|
||||
// because they were marked as up-to-date when used as textures/buffers.
|
||||
if (upload_range_start != UINT_MAX) {
|
||||
for (uint32_t i = upload_end >> 6; i < upload_pages_.size(); ++i) {
|
||||
pages_in_sync_[i] &= ~(upload_pages_[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return upload_end != 0;
|
||||
}
|
||||
|
||||
uint32_t SharedMemory::NextUploadRange(uint32_t search_start,
|
||||
uint32_t& length) const {
|
||||
uint32_t search_start_block_index = search_start >> 6;
|
||||
for (uint32_t i = search_start_block_index; i < upload_pages_.size(); ++i) {
|
||||
uint64_t start_block = upload_pages_[i];
|
||||
if (i == search_start_block_index) {
|
||||
// Exclude already visited pages in the first checked 64-page block.
|
||||
start_block &= ~((1ull << (search_start & 63)) - 1);
|
||||
}
|
||||
uint32_t start_page_local;
|
||||
if (!xe::bit_scan_forward(start_block, &start_page_local)) {
|
||||
continue;
|
||||
}
|
||||
// Found the beginning of a range - find the end.
|
||||
uint32_t start_page = (i << 6) + start_page_local;
|
||||
for (uint32_t j = i; j < upload_pages_.size(); ++j) {
|
||||
uint64_t end_block = upload_pages_[i];
|
||||
if (j == i) {
|
||||
end_block |= ~((1ull << start_page_local) - 1);
|
||||
}
|
||||
uint32_t end_page_local;
|
||||
if (xe::bit_scan_forward(~end_block, &end_page_local)) {
|
||||
length = ((j << 6) + end_page_local) - start_page;
|
||||
return start_page;
|
||||
}
|
||||
}
|
||||
length = page_count_ - start_page;
|
||||
return start_page;
|
||||
}
|
||||
return UINT_MAX;
|
||||
}
|
||||
|
||||
bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
|
||||
if (length == 0) {
|
||||
// Some texture is empty, for example - safe to draw in this case.
|
||||
|
@ -138,7 +344,7 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
|
|||
return false;
|
||||
}
|
||||
D3D12_TILED_RESOURCE_COORDINATE region_start_coordinates;
|
||||
region_start_coordinates.X = i << kHeapSize;
|
||||
region_start_coordinates.X = i << kHeapSizeLog2;
|
||||
region_start_coordinates.Y = 0;
|
||||
region_start_coordinates.Z = 0;
|
||||
region_start_coordinates.Subresource = 0;
|
||||
|
@ -161,6 +367,32 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state,
|
||||
ID3D12GraphicsCommandList* command_list) {
|
||||
if (buffer_state_ == new_state) {
|
||||
return;
|
||||
}
|
||||
D3D12_RESOURCE_BARRIER barrier;
|
||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||
barrier.Transition.pResource = buffer_;
|
||||
barrier.Transition.Subresource = 0;
|
||||
barrier.Transition.StateBefore = buffer_state_;
|
||||
barrier.Transition.StateAfter = new_state;
|
||||
command_list->ResourceBarrier(1, &barrier);
|
||||
buffer_state_ = new_state;
|
||||
}
|
||||
|
||||
void SharedMemory::UseForReading(ID3D12GraphicsCommandList* command_list) {
|
||||
TransitionBuffer(D3D12_RESOURCE_STATE_INDEX_BUFFER |
|
||||
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
|
||||
command_list);
|
||||
}
|
||||
|
||||
void SharedMemory::UseForWriting(ID3D12GraphicsCommandList* command_list) {
|
||||
TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, command_list);
|
||||
}
|
||||
|
||||
} // namespace d3d12
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -32,6 +32,10 @@ class SharedMemory {
|
|||
void Shutdown();
|
||||
|
||||
void BeginFrame();
|
||||
// Returns true if anything has been written to command_list been done.
|
||||
// The draw command list is needed for the transition.
|
||||
bool EndFrame(ID3D12GraphicsCommandList* command_list_setup,
|
||||
ID3D12GraphicsCommandList* command_list_draw);
|
||||
|
||||
// Marks the range as used in this frame, queues it for upload if it was
|
||||
// modified. Ensures the backing memory for the address range is present in
|
||||
|
@ -40,6 +44,11 @@ class SharedMemory {
|
|||
// least).
|
||||
bool UseRange(uint32_t start, uint32_t length);
|
||||
|
||||
// Makes the buffer usable for vertices, indices and texture untiling.
|
||||
void UseForReading(ID3D12GraphicsCommandList* command_list);
|
||||
// Makes the buffer usable for texture tiling after a resolve.
|
||||
void UseForWriting(ID3D12GraphicsCommandList* command_list);
|
||||
|
||||
private:
|
||||
Memory* memory_;
|
||||
|
||||
|
@ -71,7 +80,7 @@ class SharedMemory {
|
|||
// Bit vector containing whether physical memory system pages are up to date.
|
||||
std::vector<uint64_t> pages_in_sync_;
|
||||
|
||||
// Watched page management - must be synchronized.
|
||||
// Mutex for the watched pages and the triggered watches.
|
||||
std::mutex watch_mutex_;
|
||||
// Whether each physical page is watched by the GPU (after uploading).
|
||||
// Once a watch is triggered, it's not watched anymore.
|
||||
|
@ -81,8 +90,29 @@ class SharedMemory {
|
|||
// Because this is done with a locked CPU-GPU mutex, it's stored in 2 levels,
|
||||
// so unmodified pages can be skipped quickly, and clearing is also fast.
|
||||
// On L1, each bit corresponds to a single page, on L2, to 64 pages.
|
||||
// Checking if L2 is non-zero before accessing L1 is REQUIRED since L1 is not
|
||||
// cleared!
|
||||
std::vector<uint64_t> watches_triggered_l1_;
|
||||
std::vector<uint64_t> watches_triggered_l2_;
|
||||
|
||||
// Pages that need to be uploaded in this frame (that are used but modified).
|
||||
std::vector<uint64_t> upload_pages_;
|
||||
static constexpr uint32_t kUploadBufferSize = 4 * 1024 * 1024;
|
||||
struct UploadBuffer {
|
||||
ID3D12Resource* buffer;
|
||||
// Next free or submitted upload buffer.
|
||||
UploadBuffer* next;
|
||||
// When this buffer was submitted (only valid for submitted buffers).
|
||||
uint64_t submit_frame;
|
||||
};
|
||||
// Buffers are moved to available in BeginFrame and to submitted in EndFrame.
|
||||
UploadBuffer* upload_buffer_submitted_first_ = nullptr;
|
||||
UploadBuffer* upload_buffer_submitted_last_ = nullptr;
|
||||
UploadBuffer* upload_buffer_available_first_ = nullptr;
|
||||
uint32_t NextUploadRange(uint32_t search_start, uint32_t& length) const;
|
||||
|
||||
void TransitionBuffer(D3D12_RESOURCE_STATES new_state,
|
||||
ID3D12GraphicsCommandList* command_list);
|
||||
};
|
||||
|
||||
} // namespace d3d12
|
||||
|
|
Loading…
Reference in New Issue