[D3D12] SHM - mark pages as modified in the beginning of a frame
This commit is contained in:
parent
ae1196f29d
commit
bbabbc2439
|
@ -51,7 +51,7 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
}
|
||||
}
|
||||
|
||||
shared_memory_ = std::make_unique<SharedMemory>(context);
|
||||
shared_memory_ = std::make_unique<SharedMemory>(memory_, context);
|
||||
if (!shared_memory_->Initialize()) {
|
||||
XELOGE("Failed to initialize shared memory");
|
||||
return false;
|
||||
|
@ -164,6 +164,8 @@ bool D3D12CommandProcessor::BeginFrame() {
|
|||
command_lists_setup_[current_queue_frame_]->BeginRecording();
|
||||
command_lists_[current_queue_frame_]->BeginRecording();
|
||||
|
||||
shared_memory_->BeginFrame();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,10 +19,17 @@ namespace xe {
|
|||
namespace gpu {
|
||||
namespace d3d12 {
|
||||
|
||||
SharedMemory::SharedMemory(ui::d3d12::D3D12Context* context)
|
||||
: context_(context) {
|
||||
SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context)
|
||||
: memory_(memory), context_(context) {
|
||||
page_size_log2_ = xe::math::log2_ceil(xe::memory::page_size());
|
||||
pages_in_sync_.resize(kBufferSize >> page_size_log2_ >> 6);
|
||||
page_count_ = kBufferSize >> page_size_log2_;
|
||||
uint32_t page_bitmap_length = page_count_ >> 6;
|
||||
|
||||
pages_in_sync_.resize(page_bitmap_length);
|
||||
|
||||
watched_pages_.resize(page_bitmap_length);
|
||||
watches_triggered_l1_.resize(page_bitmap_length);
|
||||
watches_triggered_l2_.resize(page_bitmap_length >> 6);
|
||||
}
|
||||
|
||||
SharedMemory::~SharedMemory() { Shutdown(); }
|
||||
|
@ -51,10 +58,16 @@ bool SharedMemory::Initialize() {
|
|||
}
|
||||
|
||||
std::memset(heaps_, 0, sizeof(heaps_));
|
||||
heap_creation_failed_ = false;
|
||||
|
||||
std::memset(pages_in_sync_.data(), 0,
|
||||
page_in_sync_.size() * sizeof(uint64_t));
|
||||
|
||||
std::memset(watched_pages_.data(), 0,
|
||||
watched_pages_.size() * sizeof(uint64_t));
|
||||
std::memset(watches_triggered_l2_.data(), 0,
|
||||
watches_triggered_l2_.size() * sizeof(uint64_t));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -73,7 +86,25 @@ void SharedMemory::Shutdown() {
|
|||
}
|
||||
}
|
||||
|
||||
bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) {
|
||||
void SharedMemory::BeginFrame() {
|
||||
// Check triggered watches, clear them and mark modified pages as out of date.
|
||||
watch_mutex_.lock();
|
||||
for (uint32_t i = 0; i < watches_triggered_l2_.size(); ++i) {
|
||||
uint64_t bits_l2 = watches_triggered_l2_[i];
|
||||
uint32_t index_l2;
|
||||
while (xe::bit_scan_forward(bits_l2, &index_l2)) {
|
||||
bits_l2 &= ~(1ull << index_l2);
|
||||
uint32_t index_l1 = (i << 6) + index_l2;
|
||||
pages_in_sync_[index_l1] &= ~(watches_triggered_l1[index_l1]);
|
||||
}
|
||||
watches_triggered_l2_[i] = 0;
|
||||
}
|
||||
watch_mutex_.unlock();
|
||||
|
||||
heap_creation_failed_ = false;
|
||||
}
|
||||
|
||||
bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
|
||||
if (length == 0) {
|
||||
// Some texture is empty, for example - safe to draw in this case.
|
||||
return true;
|
||||
|
@ -83,14 +114,19 @@ bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) {
|
|||
// Exceeds the physical address space.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Ensure all tile heaps are present.
|
||||
uint32_t heap_first = start >> kHeapSizeLog2;
|
||||
uint32_t heap_last = (start + length - 1) >> kHeapSizeLog2;
|
||||
for (uint32_t i = heap_first; i <= heap_last; ++i) {
|
||||
if (heaps_[i] != nullptr) {
|
||||
continue;
|
||||
}
|
||||
// TODO(Triang3l): If heap creation has failed at least once in this frame,
|
||||
// don't try to allocate heaps until the next frame.
|
||||
if (heap_creation_failed_) {
|
||||
// Don't try to create a heap for every vertex buffer or texture in the
|
||||
// current frame anymore if have failed at least once.
|
||||
return false;
|
||||
}
|
||||
auto provider = context_->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
auto direct_queue = provider->GetDirectQueue();
|
||||
|
@ -98,6 +134,7 @@ bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) {
|
|||
heap_desc.SizeInBytes = kHeapSize;
|
||||
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[i])))) {
|
||||
heap_creation_failed_ = true;
|
||||
return false;
|
||||
}
|
||||
D3D12_TILED_RESOURCE_COORDINATE region_start_coordinates;
|
||||
|
@ -111,11 +148,16 @@ bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) {
|
|||
D3D12_TILE_RANGE_FLAGS range_flags = D3D12_TILE_RANGE_FLAG_NONE;
|
||||
UINT heap_range_start_offset = 0;
|
||||
UINT range_tile_count = kHeapSize >> kTileSizeLog2;
|
||||
// FIXME(Triang3l): This may cause issues if the emulator is shut down
|
||||
// mid-frame and the heaps are destroyed before tile mappings are updated
|
||||
// (AwaitAllFramesCompletion won't catch this then). Defer this until the
|
||||
// actual command list submission at the end of the frame.
|
||||
direct_queue->UpdateTileMappings(
|
||||
buffer_, 1, ®ion_start_coordinates, ®ion_size, heaps_[i], 1,
|
||||
&range_flags, &heap_range_start_offset, &range_tile_count,
|
||||
D3D12_TILE_MAPPING_FLAG_NONE);
|
||||
}
|
||||
// TODO(Triang3l): Mark the range for upload.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,9 @@
|
|||
#ifndef XENIA_GPU_D3D12_SHARED_MEMORY_H_
|
||||
#define XENIA_GPU_D3D12_SHARED_MEMORY_H_
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
#include "xenia/ui/d3d12/d3d12_context.h"
|
||||
|
||||
|
@ -19,22 +22,28 @@ namespace d3d12 {
|
|||
|
||||
// Manages memory for unconverted textures, resolve targets, vertex and index
|
||||
// buffers that can be accessed from shaders with Xenon physical addresses, with
|
||||
// 4 KB granularity.
|
||||
// system page size granularity.
|
||||
class SharedMemory {
|
||||
public:
|
||||
SharedMemory(ui::d3d12::D3D12Context* context);
|
||||
SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context);
|
||||
~SharedMemory();
|
||||
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
|
||||
// Ensures the backing memory for the address range is present in the tiled
|
||||
// buffer, allocating if needed. If couldn't allocate, false is returned -
|
||||
// it's unsafe to use this portion (on tiled resources tier 1 at least).
|
||||
bool EnsureRangeAllocated(uint32_t start, uint32_t length);
|
||||
void BeginFrame();
|
||||
|
||||
// Marks the range as used in this frame, queues it for upload if it was
|
||||
// modified. Ensures the backing memory for the address range is present in
|
||||
// the tiled buffer, allocating if needed. If couldn't allocate, false is
|
||||
// returned - it's unsafe to use this portion (on tiled resources tier 1 at
|
||||
// least).
|
||||
bool UseRange(uint32_t start, uint32_t length);
|
||||
|
||||
private:
|
||||
ui::d3d12::D3D12Context* context_ = nullptr;
|
||||
Memory* memory_;
|
||||
|
||||
ui::d3d12::D3D12Context* context_;
|
||||
|
||||
// The 512 MB tiled buffer.
|
||||
static constexpr uint32_t kBufferSizeLog2 = 29;
|
||||
|
@ -51,11 +60,29 @@ class SharedMemory {
|
|||
static constexpr uint32_t kHeapSize = 1 << kHeapSizeLog2;
|
||||
// Resident portions of the tiled buffer.
|
||||
ID3D12Heap* heaps_[kBufferSize >> kHeapSizeLog2] = {};
|
||||
// Whether creation of a heap has failed in the current frame.
|
||||
bool heap_creation_failed_ = false;
|
||||
|
||||
// Log2 of system page size.
|
||||
uint32_t page_size_log2_;
|
||||
// Total physical page count.
|
||||
uint32_t page_count_;
|
||||
|
||||
// Bit vector containing whether physical memory system pages are up to date.
|
||||
std::vector<uint64_t> pages_in_sync_;
|
||||
|
||||
// Watched page management - must be synchronized.
|
||||
std::mutex watch_mutex_;
|
||||
// Whether each physical page is watched by the GPU (after uploading).
|
||||
// Once a watch is triggered, it's not watched anymore.
|
||||
std::vector<uint64_t> watched_pages_;
|
||||
// Whether each page was modified while the current frame is being processed.
|
||||
// This is checked and cleared in the beginning of a GPU frame.
|
||||
// Because this is done with a locked CPU-GPU mutex, it's stored in 2 levels,
|
||||
// so unmodified pages can be skipped quickly, and clearing is also fast.
|
||||
// On L1, each bit corresponds to a single page, on L2, to 64 pages.
|
||||
std::vector<uint64_t> watches_triggered_l1_;
|
||||
std::vector<uint64_t> watches_triggered_l2_;
|
||||
};
|
||||
|
||||
} // namespace d3d12
|
||||
|
|
Loading…
Reference in New Issue