[D3D12] Shared memory watch structures
This commit is contained in:
parent
e6ec390a04
commit
005040e885
|
@ -30,7 +30,7 @@ SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context)
|
||||||
assert_true(page_bitmap_length != 0);
|
assert_true(page_bitmap_length != 0);
|
||||||
|
|
||||||
valid_pages_.resize(page_bitmap_length);
|
valid_pages_.resize(page_bitmap_length);
|
||||||
watched_pages_.resize(page_bitmap_length);
|
protected_pages_.resize(page_bitmap_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
SharedMemory::~SharedMemory() { Shutdown(); }
|
SharedMemory::~SharedMemory() { Shutdown(); }
|
||||||
|
@ -76,13 +76,13 @@ bool SharedMemory::Initialize() {
|
||||||
|
|
||||||
std::memset(valid_pages_.data(), 0, valid_pages_.size() * sizeof(uint64_t));
|
std::memset(valid_pages_.data(), 0, valid_pages_.size() * sizeof(uint64_t));
|
||||||
|
|
||||||
std::memset(watched_pages_.data(), 0,
|
std::memset(protected_pages_.data(), 0,
|
||||||
watched_pages_.size() * sizeof(uint64_t));
|
protected_pages_.size() * sizeof(uint64_t));
|
||||||
|
|
||||||
upload_buffer_pool_ =
|
upload_buffer_pool_ =
|
||||||
std::make_unique<ui::d3d12::UploadBufferPool>(context_, 4 * 1024 * 1024);
|
std::make_unique<ui::d3d12::UploadBufferPool>(context_, 4 * 1024 * 1024);
|
||||||
|
|
||||||
memory_->SetGlobalPhysicalAccessWatch(WatchCallbackThunk, this);
|
memory_->SetGlobalPhysicalAccessWatch(MemoryWriteCallbackThunk, this);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -179,7 +179,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Upload and watch used ranges.
|
// Upload and protect used ranges.
|
||||||
GetRangesToUpload(start >> page_size_log2_,
|
GetRangesToUpload(start >> page_size_log2_,
|
||||||
((start & ((1 << page_size_log2_) - 1)) + length +
|
((start & ((1 << page_size_log2_) - 1)) + length +
|
||||||
((1 << page_size_log2_) - 1)) >>
|
((1 << page_size_log2_) - 1)) >>
|
||||||
|
@ -239,7 +239,7 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first,
|
||||||
valid_bits &= (1ull << ((valid_page_last & 63) + 1)) - 1;
|
valid_bits &= (1ull << ((valid_page_last & 63) + 1)) - 1;
|
||||||
}
|
}
|
||||||
valid_pages_[i] |= valid_bits;
|
valid_pages_[i] |= valid_bits;
|
||||||
watched_pages_[i] |= valid_bits;
|
protected_pages_[i] |= valid_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
memory_->ProtectPhysicalMemory(
|
memory_->ProtectPhysicalMemory(
|
||||||
|
@ -309,27 +309,29 @@ void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SharedMemory::WatchCallbackThunk(void* context_ptr, uint32_t address) {
|
bool SharedMemory::MemoryWriteCallbackThunk(void* context_ptr,
|
||||||
return reinterpret_cast<SharedMemory*>(context_ptr)->WatchCallback(address);
|
uint32_t address) {
|
||||||
|
SharedMemory* shared_memory = reinterpret_cast<SharedMemory*>(context_ptr);
|
||||||
|
return shared_memory->MemoryWriteCallback(address);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SharedMemory::WatchCallback(uint32_t address) {
|
bool SharedMemory::MemoryWriteCallback(uint32_t address) {
|
||||||
uint32_t page_index = (address & kAddressMask) >> page_size_log2_;
|
uint32_t page_index = (address & kAddressMask) >> page_size_log2_;
|
||||||
uint32_t block_index = page_index >> 6;
|
uint32_t block_index = page_index >> 6;
|
||||||
uint64_t page_bit = 1ull << (page_index & 63);
|
uint64_t page_bit = 1ull << (page_index & 63);
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(validity_mutex_);
|
std::lock_guard<std::mutex> lock(validity_mutex_);
|
||||||
|
|
||||||
if (!(watched_pages_[block_index] & page_bit)) {
|
if (!(protected_pages_[block_index] & page_bit)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
valid_pages_[block_index] &= ~page_bit;
|
valid_pages_[block_index] &= ~page_bit;
|
||||||
// TODO(Triang3l): Invoke texture invalidation callbacks.
|
// TODO(Triang3l): Invoke watch callbacks.
|
||||||
|
|
||||||
memory_->UnprotectPhysicalMemory(page_index << page_size_log2_,
|
memory_->UnprotectPhysicalMemory(page_index << page_size_log2_,
|
||||||
1 << page_size_log2_, false);
|
1 << page_size_log2_, false);
|
||||||
watched_pages_[block_index] &= ~page_bit;
|
protected_pages_[block_index] &= ~page_bit;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,18 @@ class SharedMemory {
|
||||||
// The draw command list is needed for the transition.
|
// The draw command list is needed for the transition.
|
||||||
void EndFrame();
|
void EndFrame();
|
||||||
|
|
||||||
|
typedef void (*WatchCallback)(void* context, void* data, uint64_t argument);
|
||||||
|
typedef void* WatchHandle;
|
||||||
|
// Registers a callback invoked when something is written to the specified
|
||||||
|
// memory range by the CPU or (if triggered explicitly - such as by a resolve)
|
||||||
|
// the GPU. Generally the context is the subsystem pointer (for example, the
|
||||||
|
// texture cache), the data is the object (such as a texture), and the
|
||||||
|
// argument is additional subsystem/object-specific data (such as whether the
|
||||||
|
// range belongs to the base mip level or to the rest of the mips).
|
||||||
|
WatchHandle WatchMemoryRange(uint32_t start, uint32_t length,
|
||||||
|
WatchCallback callback, void* callback_context,
|
||||||
|
void* callback_data, uint64_t callback_argument);
|
||||||
|
|
||||||
// Checks if the range has been updated, uploads new data if needed and
|
// Checks if the range has been updated, uploads new data if needed and
|
||||||
// ensures the buffer tiles backing the range are resident. May transition the
|
// ensures the buffer tiles backing the range are resident. May transition the
|
||||||
// tiled buffer to copy destination - call this before UseForReading or
|
// tiled buffer to copy destination - call this before UseForReading or
|
||||||
|
@ -91,11 +103,71 @@ class SharedMemory {
|
||||||
// Mutex between the exception handler and the command processor, to be locked
|
// Mutex between the exception handler and the command processor, to be locked
|
||||||
// when checking or updating validity of pages/ranges.
|
// when checking or updating validity of pages/ranges.
|
||||||
std::mutex validity_mutex_;
|
std::mutex validity_mutex_;
|
||||||
|
|
||||||
|
// ***************************************************************************
|
||||||
|
// Things below should be protected by validity_mutex_.
|
||||||
|
// ***************************************************************************
|
||||||
|
|
||||||
// Bit vector containing whether physical memory system pages are up to date.
|
// Bit vector containing whether physical memory system pages are up to date.
|
||||||
std::vector<uint64_t> valid_pages_;
|
std::vector<uint64_t> valid_pages_;
|
||||||
// Mark the memory range as updated and watch it.
|
// Mark the memory range as updated and protect it.
|
||||||
void MakeRangeValid(uint32_t valid_page_first, uint32_t valid_page_count);
|
void MakeRangeValid(uint32_t valid_page_first, uint32_t valid_page_count);
|
||||||
|
|
||||||
|
// Whether each physical page is protected by the GPU code (after uploading).
|
||||||
|
std::vector<uint64_t> protected_pages_;
|
||||||
|
// Memory access callback.
|
||||||
|
static bool MemoryWriteCallbackThunk(void* context_ptr, uint32_t address);
|
||||||
|
bool MemoryWriteCallback(uint32_t address);
|
||||||
|
|
||||||
|
// Watched range placed by other GPU subsystems.
|
||||||
|
struct WatchRange {
|
||||||
|
WatchCallback callback;
|
||||||
|
void* callback_context;
|
||||||
|
void* callback_data;
|
||||||
|
uint64_t callback_argument;
|
||||||
|
struct WatchNode* node_first;
|
||||||
|
uint32_t page_first;
|
||||||
|
uint32_t page_last;
|
||||||
|
};
|
||||||
|
// Node for faster checking of watches when pages have been written to - all
|
||||||
|
// 512 MB are split into smaller equally sized buckets, and then ranges are
|
||||||
|
// linearly checked.
|
||||||
|
struct WatchNode {
|
||||||
|
WatchRange* range;
|
||||||
|
// Links to nodes belonging to other watched ranges in the bucket.
|
||||||
|
WatchNode* bucket_node_previous;
|
||||||
|
WatchNode* bucket_node_next;
|
||||||
|
// Link to another node of this watched range in the next bucket.
|
||||||
|
WatchNode* range_node_next;
|
||||||
|
};
|
||||||
|
static constexpr uint32_t kWatchBucketSizeLog2 = 22;
|
||||||
|
static constexpr uint32_t kWatchBucketCount =
|
||||||
|
1 << (kBufferSizeLog2 - kWatchBucketSizeLog2);
|
||||||
|
WatchNode* watch_buckets_[kWatchBucketCount] = {};
|
||||||
|
// Allocations in pools - taking new WatchRanges and WatchNodes from the free
|
||||||
|
// list, and if there are none, creating a pool if the current one is fully
|
||||||
|
// used, and linearly allocating from the current pool.
|
||||||
|
union WatchRangeAllocation {
|
||||||
|
WatchRange range;
|
||||||
|
WatchRangeAllocation* next_free;
|
||||||
|
};
|
||||||
|
union WatchNodeAllocation {
|
||||||
|
WatchNode node;
|
||||||
|
WatchNodeAllocation* next_free;
|
||||||
|
};
|
||||||
|
static constexpr uint32_t kWatchRangePoolSize = 8192;
|
||||||
|
static constexpr uint32_t kWatchNodePoolSize = 8192;
|
||||||
|
std::vector<WatchRangeAllocation*> watch_range_pools_;
|
||||||
|
std::vector<WatchNodeAllocation*> watch_node_pools_;
|
||||||
|
uint32_t watch_range_current_pool_allocated_ = 0;
|
||||||
|
uint32_t watch_node_current_pool_allocated_ = 0;
|
||||||
|
WatchRangeAllocation* watch_range_first_free = nullptr;
|
||||||
|
WatchNodeAllocation* watch_node_first_free = nullptr;
|
||||||
|
|
||||||
|
// ***************************************************************************
|
||||||
|
// Things above should be protected by validity_mutex_.
|
||||||
|
// ***************************************************************************
|
||||||
|
|
||||||
// First page and length in pages.
|
// First page and length in pages.
|
||||||
typedef std::pair<uint32_t, uint32_t> UploadRange;
|
typedef std::pair<uint32_t, uint32_t> UploadRange;
|
||||||
// Ranges that need to be uploaded, generated by GetRangesToUpload (a
|
// Ranges that need to be uploaded, generated by GetRangesToUpload (a
|
||||||
|
@ -105,13 +177,6 @@ class SharedMemory {
|
||||||
uint32_t request_page_count);
|
uint32_t request_page_count);
|
||||||
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
|
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
|
||||||
|
|
||||||
// Whether each physical page is watched by the GPU (after uploading).
|
|
||||||
// Once a watch is triggered, it's not watched anymore.
|
|
||||||
std::vector<uint64_t> watched_pages_;
|
|
||||||
// Memory access callback.
|
|
||||||
static bool WatchCallbackThunk(void* context_ptr, uint32_t address);
|
|
||||||
bool WatchCallback(uint32_t address);
|
|
||||||
|
|
||||||
void TransitionBuffer(D3D12_RESOURCE_STATES new_state,
|
void TransitionBuffer(D3D12_RESOURCE_STATES new_state,
|
||||||
ID3D12GraphicsCommandList* command_list);
|
ID3D12GraphicsCommandList* command_list);
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue