[D3D12] SHM - mark pages as modified in the beginning of a frame

This commit is contained in:
Triang3l 2018-07-25 15:04:14 +03:00
parent ae1196f29d
commit bbabbc2439
3 changed files with 85 additions and 14 deletions

View File

@ -51,7 +51,7 @@ bool D3D12CommandProcessor::SetupContext() {
}
}
shared_memory_ = std::make_unique<SharedMemory>(context);
shared_memory_ = std::make_unique<SharedMemory>(memory_, context);
if (!shared_memory_->Initialize()) {
XELOGE("Failed to initialize shared memory");
return false;
@ -164,6 +164,8 @@ bool D3D12CommandProcessor::BeginFrame() {
command_lists_setup_[current_queue_frame_]->BeginRecording();
command_lists_[current_queue_frame_]->BeginRecording();
shared_memory_->BeginFrame();
return true;
}

View File

@ -19,10 +19,17 @@ namespace xe {
namespace gpu {
namespace d3d12 {
SharedMemory::SharedMemory(ui::d3d12::D3D12Context* context)
: context_(context) {
SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context)
: memory_(memory), context_(context) {
page_size_log2_ = xe::math::log2_ceil(xe::memory::page_size());
pages_in_sync_.resize(kBufferSize >> page_size_log2_ >> 6);
page_count_ = kBufferSize >> page_size_log2_;
uint32_t page_bitmap_length = page_count_ >> 6;
pages_in_sync_.resize(page_bitmap_length);
watched_pages_.resize(page_bitmap_length);
watches_triggered_l1_.resize(page_bitmap_length);
watches_triggered_l2_.resize(page_bitmap_length >> 6);
}
SharedMemory::~SharedMemory() { Shutdown(); }
@ -51,10 +58,16 @@ bool SharedMemory::Initialize() {
}
std::memset(heaps_, 0, sizeof(heaps_));
heap_creation_failed_ = false;
std::memset(pages_in_sync_.data(), 0,
page_in_sync_.size() * sizeof(uint64_t));
std::memset(watched_pages_.data(), 0,
watched_pages_.size() * sizeof(uint64_t));
std::memset(watches_triggered_l2_.data(), 0,
watches_triggered_l2_.size() * sizeof(uint64_t));
return true;
}
@ -73,7 +86,25 @@ void SharedMemory::Shutdown() {
}
}
bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) {
void SharedMemory::BeginFrame() {
// Check triggered watches, clear them and mark modified pages as out of date.
watch_mutex_.lock();
for (uint32_t i = 0; i < watches_triggered_l2_.size(); ++i) {
uint64_t bits_l2 = watches_triggered_l2_[i];
uint32_t index_l2;
while (xe::bit_scan_forward(bits_l2, &index_l2)) {
bits_l2 &= ~(1ull << index_l2);
uint32_t index_l1 = (i << 6) + index_l2;
pages_in_sync_[index_l1] &= ~(watches_triggered_l1[index_l1]);
}
watches_triggered_l2_[i] = 0;
}
watch_mutex_.unlock();
heap_creation_failed_ = false;
}
bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
if (length == 0) {
// Some texture is empty, for example - safe to draw in this case.
return true;
@ -83,14 +114,19 @@ bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) {
// Exceeds the physical address space.
return false;
}
// Ensure all tile heaps are present.
uint32_t heap_first = start >> kHeapSizeLog2;
uint32_t heap_last = (start + length - 1) >> kHeapSizeLog2;
for (uint32_t i = heap_first; i <= heap_last; ++i) {
if (heaps_[i] != nullptr) {
continue;
}
// TODO(Triang3l): If heap creation has failed at least once in this frame,
// don't try to allocate heaps until the next frame.
if (heap_creation_failed_) {
// Don't try to create a heap for every vertex buffer or texture in the
// current frame anymore if have failed at least once.
return false;
}
auto provider = context_->GetD3D12Provider();
auto device = provider->GetDevice();
auto direct_queue = provider->GetDirectQueue();
@ -98,6 +134,7 @@ bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) {
heap_desc.SizeInBytes = kHeapSize;
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[i])))) {
heap_creation_failed_ = true;
return false;
}
D3D12_TILED_RESOURCE_COORDINATE region_start_coordinates;
@ -111,11 +148,16 @@ bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) {
D3D12_TILE_RANGE_FLAGS range_flags = D3D12_TILE_RANGE_FLAG_NONE;
UINT heap_range_start_offset = 0;
UINT range_tile_count = kHeapSize >> kTileSizeLog2;
// FIXME(Triang3l): This may cause issues if the emulator is shut down
// mid-frame and the heaps are destroyed before tile mappings are updated
// (AwaitAllFramesCompletion won't catch this then). Defer this until the
// actual command list submission at the end of the frame.
direct_queue->UpdateTileMappings(
buffer_, 1, &region_start_coordinates, &region_size, heaps_[i], 1,
&range_flags, &heap_range_start_offset, &range_tile_count,
D3D12_TILE_MAPPING_FLAG_NONE);
}
// TODO(Triang3l): Mark the range for upload.
return true;
}

View File

@ -10,6 +10,9 @@
#ifndef XENIA_GPU_D3D12_SHARED_MEMORY_H_
#define XENIA_GPU_D3D12_SHARED_MEMORY_H_
#include <mutex>
#include "xenia/memory.h"
#include "xenia/ui/d3d12/d3d12_api.h"
#include "xenia/ui/d3d12/d3d12_context.h"
@ -19,22 +22,28 @@ namespace d3d12 {
// Manages memory for unconverted textures, resolve targets, vertex and index
// buffers that can be accessed from shaders with Xenon physical addresses, with
// 4 KB granularity.
// system page size granularity.
class SharedMemory {
public:
SharedMemory(ui::d3d12::D3D12Context* context);
SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context);
~SharedMemory();
bool Initialize();
void Shutdown();
// Ensures the backing memory for the address range is present in the tiled
// buffer, allocating if needed. If couldn't allocate, false is returned -
// it's unsafe to use this portion (on tiled resources tier 1 at least).
bool EnsureRangeAllocated(uint32_t start, uint32_t length);
void BeginFrame();
// Marks the range as used in this frame, queues it for upload if it was
// modified. Ensures the backing memory for the address range is present in
// the tiled buffer, allocating if needed. If couldn't allocate, false is
// returned - it's unsafe to use this portion (on tiled resources tier 1 at
// least).
bool UseRange(uint32_t start, uint32_t length);
private:
ui::d3d12::D3D12Context* context_ = nullptr;
Memory* memory_;
ui::d3d12::D3D12Context* context_;
// The 512 MB tiled buffer.
static constexpr uint32_t kBufferSizeLog2 = 29;
@ -51,11 +60,29 @@ class SharedMemory {
static constexpr uint32_t kHeapSize = 1 << kHeapSizeLog2;
// Resident portions of the tiled buffer.
ID3D12Heap* heaps_[kBufferSize >> kHeapSizeLog2] = {};
// Whether creation of a heap has failed in the current frame.
bool heap_creation_failed_ = false;
// Log2 of system page size.
uint32_t page_size_log2_;
// Total physical page count.
uint32_t page_count_;
// Bit vector containing whether physical memory system pages are up to date.
std::vector<uint64_t> pages_in_sync_;
// Watched page management - must be synchronized.
std::mutex watch_mutex_;
// Whether each physical page is watched by the GPU (after uploading).
// Once a watch is triggered, it's not watched anymore.
std::vector<uint64_t> watched_pages_;
// Whether each page was modified while the current frame is being processed.
// This is checked and cleared in the beginning of a GPU frame.
// Because this is done with a locked CPU-GPU mutex, it's stored in 2 levels,
// so unmodified pages can be skipped quickly, and clearing is also fast.
// On L1, each bit corresponds to a single page, on L2, to 64 pages.
std::vector<uint64_t> watches_triggered_l1_;
std::vector<uint64_t> watches_triggered_l2_;
};
} // namespace d3d12