[D3D12] SHM watch creation
This commit is contained in:
parent
005040e885
commit
9e21f5ab67
|
@ -115,6 +115,77 @@ void SharedMemory::BeginFrame() {
|
||||||
|
|
||||||
void SharedMemory::EndFrame() { upload_buffer_pool_->EndFrame(); }
|
void SharedMemory::EndFrame() { upload_buffer_pool_->EndFrame(); }
|
||||||
|
|
||||||
|
SharedMemory::WatchHandle SharedMemory::WatchMemoryRange(
|
||||||
|
uint32_t start, uint32_t length, WatchCallback callback,
|
||||||
|
void* callback_context, void* callback_data, uint64_t callback_argument) {
|
||||||
|
start &= kAddressMask;
|
||||||
|
if (start >= kBufferSize || length == 0) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
length = std::min(length, kBufferSize - start);
|
||||||
|
uint32_t watch_page_first = start >> page_size_log2_;
|
||||||
|
uint32_t watch_page_last = (start + length - 1) >> page_size_log2_;
|
||||||
|
uint32_t bucket_first =
|
||||||
|
watch_page_first << page_size_log2_ >> kWatchBucketSizeLog2;
|
||||||
|
uint32_t bucket_last =
|
||||||
|
watch_page_last << page_size_log2_ >> kWatchBucketSizeLog2;
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(validity_mutex_);
|
||||||
|
|
||||||
|
// Allocate the range.
|
||||||
|
WatchRange* range = watch_range_first_free_;
|
||||||
|
if (range != nullptr) {
|
||||||
|
watch_range_first_free_ = range->next_free;
|
||||||
|
} else {
|
||||||
|
if (watch_range_pools_.empty() ||
|
||||||
|
watch_range_current_pool_allocated_ >= kWatchRangePoolSize) {
|
||||||
|
watch_range_pools_.push_back(new WatchRange[kWatchRangePoolSize]);
|
||||||
|
watch_range_current_pool_allocated_ = 0;
|
||||||
|
range =
|
||||||
|
&(watch_range_pools_.back()[watch_range_current_pool_allocated_++]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
range->callback = callback;
|
||||||
|
range->callback_context = callback_context;
|
||||||
|
range->callback_data = callback_data;
|
||||||
|
range->callback_argument = callback_argument;
|
||||||
|
range->page_first = watch_page_first;
|
||||||
|
range->page_last = watch_page_last;
|
||||||
|
|
||||||
|
// Allocate and link the nodes.
|
||||||
|
WatchNode* node_previous = nullptr;
|
||||||
|
for (uint32_t i = bucket_first; i <= bucket_last; ++i) {
|
||||||
|
WatchNode* node = watch_node_first_free_;
|
||||||
|
if (node != nullptr) {
|
||||||
|
watch_node_first_free_ = node->next_free;
|
||||||
|
} else {
|
||||||
|
if (watch_node_pools_.empty() ||
|
||||||
|
watch_node_current_pool_allocated_ >= kWatchNodePoolSize) {
|
||||||
|
watch_node_pools_.push_back(new WatchNode[kWatchNodePoolSize]);
|
||||||
|
watch_node_current_pool_allocated_ = 0;
|
||||||
|
node =
|
||||||
|
&(watch_node_pools_.back()[watch_node_current_pool_allocated_++]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node->range = range;
|
||||||
|
node->range_node_next = nullptr;
|
||||||
|
if (node_previous != nullptr) {
|
||||||
|
node_previous->range_node_next = node;
|
||||||
|
} else {
|
||||||
|
range->node_first = node;
|
||||||
|
}
|
||||||
|
node_previous = node;
|
||||||
|
node->bucket_node_previous = nullptr;
|
||||||
|
node->bucket_node_next = watch_buckets_[i];
|
||||||
|
if (watch_buckets_[i] != nullptr) {
|
||||||
|
watch_buckets_[i]->bucket_node_previous = node;
|
||||||
|
}
|
||||||
|
watch_buckets_[i] = node;
|
||||||
|
}
|
||||||
|
|
||||||
|
return reinterpret_cast<WatchHandle>(range);
|
||||||
|
}
|
||||||
|
|
||||||
bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
|
bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
|
||||||
ID3D12GraphicsCommandList* command_list) {
|
ID3D12GraphicsCommandList* command_list) {
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
|
@ -122,7 +193,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
start &= kAddressMask;
|
start &= kAddressMask;
|
||||||
if ((kBufferSize - start) < length) {
|
if (start >= kBufferSize || (kBufferSize - start) < length) {
|
||||||
// Exceeds the physical address space.
|
// Exceeds the physical address space.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -247,6 +318,29 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first,
|
||||||
cpu::MMIOHandler::WatchType::kWatchWrite, false);
|
cpu::MMIOHandler::WatchType::kWatchWrite, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SharedMemory::UnlinkWatchRange(WatchRange* range) {
|
||||||
|
uint32_t bucket =
|
||||||
|
range->page_first << page_size_log2_ >> kWatchBucketSizeLog2;
|
||||||
|
WatchNode* node = range->node_first;
|
||||||
|
while (node != nullptr) {
|
||||||
|
WatchNode* node_next = node->range_node_next;
|
||||||
|
if (node->bucket_node_previous != nullptr) {
|
||||||
|
node->bucket_node_previous->bucket_node_next = node->bucket_node_next;
|
||||||
|
} else {
|
||||||
|
watch_buckets_[bucket] = node->bucket_node_next;
|
||||||
|
}
|
||||||
|
if (node->bucket_node_next != nullptr) {
|
||||||
|
node->bucket_node_next->bucket_node_previous = node->bucket_node_previous;
|
||||||
|
}
|
||||||
|
node->next_free = watch_node_first_free_;
|
||||||
|
watch_node_first_free_ = node;
|
||||||
|
node = node_next;
|
||||||
|
++bucket;
|
||||||
|
}
|
||||||
|
range->next_free = watch_range_first_free_;
|
||||||
|
watch_range_first_free_ = range;
|
||||||
|
}
|
||||||
|
|
||||||
void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
|
void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
|
||||||
uint32_t request_page_count) {
|
uint32_t request_page_count) {
|
||||||
upload_ranges_.clear();
|
upload_ranges_.clear();
|
||||||
|
|
|
@ -47,10 +47,16 @@ class SharedMemory {
|
||||||
typedef void* WatchHandle;
|
typedef void* WatchHandle;
|
||||||
// Registers a callback invoked when something is written to the specified
|
// Registers a callback invoked when something is written to the specified
|
||||||
// memory range by the CPU or (if triggered explicitly - such as by a resolve)
|
// memory range by the CPU or (if triggered explicitly - such as by a resolve)
|
||||||
// the GPU. Generally the context is the subsystem pointer (for example, the
|
// the GPU.
|
||||||
|
//
|
||||||
|
// Generally the context is the subsystem pointer (for example, the
|
||||||
// texture cache), the data is the object (such as a texture), and the
|
// texture cache), the data is the object (such as a texture), and the
|
||||||
// argument is additional subsystem/object-specific data (such as whether the
|
// argument is additional subsystem/object-specific data (such as whether the
|
||||||
// range belongs to the base mip level or to the rest of the mips).
|
// range belongs to the base mip level or to the rest of the mips).
|
||||||
|
//
|
||||||
|
// The callback is called with the mutex locked. Do NOT watch or unwatch
|
||||||
|
// ranges from within it! The watch for the callback is cancelled after the
|
||||||
|
// callback.
|
||||||
WatchHandle WatchMemoryRange(uint32_t start, uint32_t length,
|
WatchHandle WatchMemoryRange(uint32_t start, uint32_t length,
|
||||||
WatchCallback callback, void* callback_context,
|
WatchCallback callback, void* callback_context,
|
||||||
void* callback_data, uint64_t callback_argument);
|
void* callback_data, uint64_t callback_argument);
|
||||||
|
@ -119,50 +125,55 @@ class SharedMemory {
|
||||||
static bool MemoryWriteCallbackThunk(void* context_ptr, uint32_t address);
|
static bool MemoryWriteCallbackThunk(void* context_ptr, uint32_t address);
|
||||||
bool MemoryWriteCallback(uint32_t address);
|
bool MemoryWriteCallback(uint32_t address);
|
||||||
|
|
||||||
|
struct WatchNode;
|
||||||
// Watched range placed by other GPU subsystems.
|
// Watched range placed by other GPU subsystems.
|
||||||
struct WatchRange {
|
struct WatchRange {
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
WatchCallback callback;
|
WatchCallback callback;
|
||||||
void* callback_context;
|
void* callback_context;
|
||||||
void* callback_data;
|
void* callback_data;
|
||||||
uint64_t callback_argument;
|
uint64_t callback_argument;
|
||||||
struct WatchNode* node_first;
|
WatchNode* node_first;
|
||||||
uint32_t page_first;
|
uint32_t page_first;
|
||||||
uint32_t page_last;
|
uint32_t page_last;
|
||||||
};
|
};
|
||||||
|
WatchRange* next_free;
|
||||||
|
};
|
||||||
|
};
|
||||||
// Node for faster checking of watches when pages have been written to - all
|
// Node for faster checking of watches when pages have been written to - all
|
||||||
// 512 MB are split into smaller equally sized buckets, and then ranges are
|
// 512 MB are split into smaller equally sized buckets, and then ranges are
|
||||||
// linearly checked.
|
// linearly checked.
|
||||||
struct WatchNode {
|
struct WatchNode {
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
WatchRange* range;
|
WatchRange* range;
|
||||||
|
// Link to another node of this watched range in the next bucket.
|
||||||
|
WatchNode* range_node_next;
|
||||||
// Links to nodes belonging to other watched ranges in the bucket.
|
// Links to nodes belonging to other watched ranges in the bucket.
|
||||||
WatchNode* bucket_node_previous;
|
WatchNode* bucket_node_previous;
|
||||||
WatchNode* bucket_node_next;
|
WatchNode* bucket_node_next;
|
||||||
// Link to another node of this watched range in the next bucket.
|
};
|
||||||
WatchNode* range_node_next;
|
WatchNode* next_free;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
static constexpr uint32_t kWatchBucketSizeLog2 = 22;
|
static constexpr uint32_t kWatchBucketSizeLog2 = 22;
|
||||||
static constexpr uint32_t kWatchBucketCount =
|
static constexpr uint32_t kWatchBucketCount =
|
||||||
1 << (kBufferSizeLog2 - kWatchBucketSizeLog2);
|
1 << (kBufferSizeLog2 - kWatchBucketSizeLog2);
|
||||||
WatchNode* watch_buckets_[kWatchBucketCount] = {};
|
WatchNode* watch_buckets_[kWatchBucketCount] = {};
|
||||||
// Allocations in pools - taking new WatchRanges and WatchNodes from the free
|
// Allocation from pools - taking new WatchRanges and WatchNodes from the free
|
||||||
// list, and if there are none, creating a pool if the current one is fully
|
// list, and if there are none, creating a pool if the current one is fully
|
||||||
// used, and linearly allocating from the current pool.
|
// used, and linearly allocating from the current pool.
|
||||||
union WatchRangeAllocation {
|
|
||||||
WatchRange range;
|
|
||||||
WatchRangeAllocation* next_free;
|
|
||||||
};
|
|
||||||
union WatchNodeAllocation {
|
|
||||||
WatchNode node;
|
|
||||||
WatchNodeAllocation* next_free;
|
|
||||||
};
|
|
||||||
static constexpr uint32_t kWatchRangePoolSize = 8192;
|
static constexpr uint32_t kWatchRangePoolSize = 8192;
|
||||||
static constexpr uint32_t kWatchNodePoolSize = 8192;
|
static constexpr uint32_t kWatchNodePoolSize = 8192;
|
||||||
std::vector<WatchRangeAllocation*> watch_range_pools_;
|
std::vector<WatchRange*> watch_range_pools_;
|
||||||
std::vector<WatchNodeAllocation*> watch_node_pools_;
|
std::vector<WatchNode*> watch_node_pools_;
|
||||||
uint32_t watch_range_current_pool_allocated_ = 0;
|
uint32_t watch_range_current_pool_allocated_ = 0;
|
||||||
uint32_t watch_node_current_pool_allocated_ = 0;
|
uint32_t watch_node_current_pool_allocated_ = 0;
|
||||||
WatchRangeAllocation* watch_range_first_free = nullptr;
|
WatchRange* watch_range_first_free_ = nullptr;
|
||||||
WatchNodeAllocation* watch_node_first_free = nullptr;
|
WatchNode* watch_node_first_free_ = nullptr;
|
||||||
|
// Unlinks and frees the range and its nodes. Call this with the mutex locked.
|
||||||
|
void UnlinkWatchRange(WatchRange* range);
|
||||||
|
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
// Things above should be protected by validity_mutex_.
|
// Things above should be protected by validity_mutex_.
|
||||||
|
|
Loading…
Reference in New Issue