[D3D12] SHM watch creation

This commit is contained in:
Triang3l 2018-08-18 00:16:55 +03:00
parent 005040e885
commit 9e21f5ab67
2 changed files with 133 additions and 28 deletions

View File

@ -115,6 +115,77 @@ void SharedMemory::BeginFrame() {
void SharedMemory::EndFrame() { upload_buffer_pool_->EndFrame(); } void SharedMemory::EndFrame() { upload_buffer_pool_->EndFrame(); }
SharedMemory::WatchHandle SharedMemory::WatchMemoryRange(
uint32_t start, uint32_t length, WatchCallback callback,
void* callback_context, void* callback_data, uint64_t callback_argument) {
start &= kAddressMask;
if (start >= kBufferSize || length == 0) {
return nullptr;
}
length = std::min(length, kBufferSize - start);
uint32_t watch_page_first = start >> page_size_log2_;
uint32_t watch_page_last = (start + length - 1) >> page_size_log2_;
uint32_t bucket_first =
watch_page_first << page_size_log2_ >> kWatchBucketSizeLog2;
uint32_t bucket_last =
watch_page_last << page_size_log2_ >> kWatchBucketSizeLog2;
std::lock_guard<std::mutex> lock(validity_mutex_);
// Allocate the range.
WatchRange* range = watch_range_first_free_;
if (range != nullptr) {
watch_range_first_free_ = range->next_free;
} else {
if (watch_range_pools_.empty() ||
watch_range_current_pool_allocated_ >= kWatchRangePoolSize) {
watch_range_pools_.push_back(new WatchRange[kWatchRangePoolSize]);
watch_range_current_pool_allocated_ = 0;
range =
&(watch_range_pools_.back()[watch_range_current_pool_allocated_++]);
}
}
range->callback = callback;
range->callback_context = callback_context;
range->callback_data = callback_data;
range->callback_argument = callback_argument;
range->page_first = watch_page_first;
range->page_last = watch_page_last;
// Allocate and link the nodes.
WatchNode* node_previous = nullptr;
for (uint32_t i = bucket_first; i <= bucket_last; ++i) {
WatchNode* node = watch_node_first_free_;
if (node != nullptr) {
watch_node_first_free_ = node->next_free;
} else {
if (watch_node_pools_.empty() ||
watch_node_current_pool_allocated_ >= kWatchNodePoolSize) {
watch_node_pools_.push_back(new WatchNode[kWatchNodePoolSize]);
watch_node_current_pool_allocated_ = 0;
node =
&(watch_node_pools_.back()[watch_node_current_pool_allocated_++]);
}
}
node->range = range;
node->range_node_next = nullptr;
if (node_previous != nullptr) {
node_previous->range_node_next = node;
} else {
range->node_first = node;
}
node_previous = node;
node->bucket_node_previous = nullptr;
node->bucket_node_next = watch_buckets_[i];
if (watch_buckets_[i] != nullptr) {
watch_buckets_[i]->bucket_node_previous = node;
}
watch_buckets_[i] = node;
}
return reinterpret_cast<WatchHandle>(range);
}
bool SharedMemory::RequestRange(uint32_t start, uint32_t length, bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
ID3D12GraphicsCommandList* command_list) { ID3D12GraphicsCommandList* command_list) {
if (length == 0) { if (length == 0) {
@ -122,7 +193,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
return true; return true;
} }
start &= kAddressMask; start &= kAddressMask;
if ((kBufferSize - start) < length) { if (start >= kBufferSize || (kBufferSize - start) < length) {
// Exceeds the physical address space. // Exceeds the physical address space.
return false; return false;
} }
@ -247,6 +318,29 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first,
cpu::MMIOHandler::WatchType::kWatchWrite, false); cpu::MMIOHandler::WatchType::kWatchWrite, false);
} }
void SharedMemory::UnlinkWatchRange(WatchRange* range) {
uint32_t bucket =
range->page_first << page_size_log2_ >> kWatchBucketSizeLog2;
WatchNode* node = range->node_first;
while (node != nullptr) {
WatchNode* node_next = node->range_node_next;
if (node->bucket_node_previous != nullptr) {
node->bucket_node_previous->bucket_node_next = node->bucket_node_next;
} else {
watch_buckets_[bucket] = node->bucket_node_next;
}
if (node->bucket_node_next != nullptr) {
node->bucket_node_next->bucket_node_previous = node->bucket_node_previous;
}
node->next_free = watch_node_first_free_;
watch_node_first_free_ = node;
node = node_next;
++bucket;
}
range->next_free = watch_range_first_free_;
watch_range_first_free_ = range;
}
void SharedMemory::GetRangesToUpload(uint32_t request_page_first, void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
uint32_t request_page_count) { uint32_t request_page_count) {
upload_ranges_.clear(); upload_ranges_.clear();

View File

@ -47,10 +47,16 @@ class SharedMemory {
typedef void* WatchHandle; typedef void* WatchHandle;
// Registers a callback invoked when something is written to the specified // Registers a callback invoked when something is written to the specified
// memory range by the CPU or (if triggered explicitly - such as by a resolve) // memory range by the CPU or (if triggered explicitly - such as by a resolve)
// the GPU. Generally the context is the subsystem pointer (for example, the // the GPU.
//
// Generally the context is the subsystem pointer (for example, the
// texture cache), the data is the object (such as a texture), and the // texture cache), the data is the object (such as a texture), and the
// argument is additional subsystem/object-specific data (such as whether the // argument is additional subsystem/object-specific data (such as whether the
// range belongs to the base mip level or to the rest of the mips). // range belongs to the base mip level or to the rest of the mips).
//
// The callback is called with the mutex locked. Do NOT watch or unwatch
// ranges from within it! The watch for the callback is cancelled after the
// callback.
WatchHandle WatchMemoryRange(uint32_t start, uint32_t length, WatchHandle WatchMemoryRange(uint32_t start, uint32_t length,
WatchCallback callback, void* callback_context, WatchCallback callback, void* callback_context,
void* callback_data, uint64_t callback_argument); void* callback_data, uint64_t callback_argument);
@ -119,50 +125,55 @@ class SharedMemory {
static bool MemoryWriteCallbackThunk(void* context_ptr, uint32_t address); static bool MemoryWriteCallbackThunk(void* context_ptr, uint32_t address);
bool MemoryWriteCallback(uint32_t address); bool MemoryWriteCallback(uint32_t address);
struct WatchNode;
// Watched range placed by other GPU subsystems. // Watched range placed by other GPU subsystems.
struct WatchRange { struct WatchRange {
WatchCallback callback; union {
void* callback_context; struct {
void* callback_data; WatchCallback callback;
uint64_t callback_argument; void* callback_context;
struct WatchNode* node_first; void* callback_data;
uint32_t page_first; uint64_t callback_argument;
uint32_t page_last; WatchNode* node_first;
uint32_t page_first;
uint32_t page_last;
};
WatchRange* next_free;
};
}; };
// Node for faster checking of watches when pages have been written to - all // Node for faster checking of watches when pages have been written to - all
// 512 MB are split into smaller equally sized buckets, and then ranges are // 512 MB are split into smaller equally sized buckets, and then ranges are
// linearly checked. // linearly checked.
struct WatchNode { struct WatchNode {
WatchRange* range; union {
// Links to nodes belonging to other watched ranges in the bucket. struct {
WatchNode* bucket_node_previous; WatchRange* range;
WatchNode* bucket_node_next; // Link to another node of this watched range in the next bucket.
// Link to another node of this watched range in the next bucket. WatchNode* range_node_next;
WatchNode* range_node_next; // Links to nodes belonging to other watched ranges in the bucket.
WatchNode* bucket_node_previous;
WatchNode* bucket_node_next;
};
WatchNode* next_free;
};
}; };
static constexpr uint32_t kWatchBucketSizeLog2 = 22; static constexpr uint32_t kWatchBucketSizeLog2 = 22;
static constexpr uint32_t kWatchBucketCount = static constexpr uint32_t kWatchBucketCount =
1 << (kBufferSizeLog2 - kWatchBucketSizeLog2); 1 << (kBufferSizeLog2 - kWatchBucketSizeLog2);
WatchNode* watch_buckets_[kWatchBucketCount] = {}; WatchNode* watch_buckets_[kWatchBucketCount] = {};
// Allocations in pools - taking new WatchRanges and WatchNodes from the free // Allocation from pools - taking new WatchRanges and WatchNodes from the free
// list, and if there are none, creating a pool if the current one is fully // list, and if there are none, creating a pool if the current one is fully
// used, and linearly allocating from the current pool. // used, and linearly allocating from the current pool.
union WatchRangeAllocation {
WatchRange range;
WatchRangeAllocation* next_free;
};
union WatchNodeAllocation {
WatchNode node;
WatchNodeAllocation* next_free;
};
static constexpr uint32_t kWatchRangePoolSize = 8192; static constexpr uint32_t kWatchRangePoolSize = 8192;
static constexpr uint32_t kWatchNodePoolSize = 8192; static constexpr uint32_t kWatchNodePoolSize = 8192;
std::vector<WatchRangeAllocation*> watch_range_pools_; std::vector<WatchRange*> watch_range_pools_;
std::vector<WatchNodeAllocation*> watch_node_pools_; std::vector<WatchNode*> watch_node_pools_;
uint32_t watch_range_current_pool_allocated_ = 0; uint32_t watch_range_current_pool_allocated_ = 0;
uint32_t watch_node_current_pool_allocated_ = 0; uint32_t watch_node_current_pool_allocated_ = 0;
WatchRangeAllocation* watch_range_first_free = nullptr; WatchRange* watch_range_first_free_ = nullptr;
WatchNodeAllocation* watch_node_first_free = nullptr; WatchNode* watch_node_first_free_ = nullptr;
// Unlinks and frees the range and its nodes. Call this with the mutex locked.
void UnlinkWatchRange(WatchRange* range);
// *************************************************************************** // ***************************************************************************
// Things above should be protected by validity_mutex_. // Things above should be protected by validity_mutex_.