diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index eb922cfff..ee71d43d1 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -115,6 +115,77 @@ void SharedMemory::BeginFrame() { void SharedMemory::EndFrame() { upload_buffer_pool_->EndFrame(); } +SharedMemory::WatchHandle SharedMemory::WatchMemoryRange( + uint32_t start, uint32_t length, WatchCallback callback, + void* callback_context, void* callback_data, uint64_t callback_argument) { + start &= kAddressMask; + if (start >= kBufferSize || length == 0) { + return nullptr; + } + length = std::min(length, kBufferSize - start); + uint32_t watch_page_first = start >> page_size_log2_; + uint32_t watch_page_last = (start + length - 1) >> page_size_log2_; + uint32_t bucket_first = + watch_page_first << page_size_log2_ >> kWatchBucketSizeLog2; + uint32_t bucket_last = + watch_page_last << page_size_log2_ >> kWatchBucketSizeLog2; + + std::lock_guard lock(validity_mutex_); + + // Allocate the range. + WatchRange* range = watch_range_first_free_; + if (range != nullptr) { + watch_range_first_free_ = range->next_free; + } else { + if (watch_range_pools_.empty() || + watch_range_current_pool_allocated_ >= kWatchRangePoolSize) { + watch_range_pools_.push_back(new WatchRange[kWatchRangePoolSize]); + watch_range_current_pool_allocated_ = 0; + range = + &(watch_range_pools_.back()[watch_range_current_pool_allocated_++]); + } + } + range->callback = callback; + range->callback_context = callback_context; + range->callback_data = callback_data; + range->callback_argument = callback_argument; + range->page_first = watch_page_first; + range->page_last = watch_page_last; + + // Allocate and link the nodes. + WatchNode* node_previous = nullptr; + for (uint32_t i = bucket_first; i <= bucket_last; ++i) { + WatchNode* node = watch_node_first_free_; + if (node != nullptr) { + watch_node_first_free_ = node->next_free; + } else { + if (watch_node_pools_.empty() || + watch_node_current_pool_allocated_ >= kWatchNodePoolSize) { + watch_node_pools_.push_back(new WatchNode[kWatchNodePoolSize]); + watch_node_current_pool_allocated_ = 0; + node = + &(watch_node_pools_.back()[watch_node_current_pool_allocated_++]); + } + } + node->range = range; + node->range_node_next = nullptr; + if (node_previous != nullptr) { + node_previous->range_node_next = node; + } else { + range->node_first = node; + } + node_previous = node; + node->bucket_node_previous = nullptr; + node->bucket_node_next = watch_buckets_[i]; + if (watch_buckets_[i] != nullptr) { + watch_buckets_[i]->bucket_node_previous = node; + } + watch_buckets_[i] = node; + } + + return reinterpret_cast(range); +} + bool SharedMemory::RequestRange(uint32_t start, uint32_t length, ID3D12GraphicsCommandList* command_list) { if (length == 0) { @@ -122,7 +193,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length, return true; } start &= kAddressMask; - if ((kBufferSize - start) < length) { + if (start >= kBufferSize || (kBufferSize - start) < length) { // Exceeds the physical address space. return false; } @@ -247,6 +318,29 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first, cpu::MMIOHandler::WatchType::kWatchWrite, false); } +void SharedMemory::UnlinkWatchRange(WatchRange* range) { + uint32_t bucket = + range->page_first << page_size_log2_ >> kWatchBucketSizeLog2; + WatchNode* node = range->node_first; + while (node != nullptr) { + WatchNode* node_next = node->range_node_next; + if (node->bucket_node_previous != nullptr) { + node->bucket_node_previous->bucket_node_next = node->bucket_node_next; + } else { + watch_buckets_[bucket] = node->bucket_node_next; + } + if (node->bucket_node_next != nullptr) { + node->bucket_node_next->bucket_node_previous = node->bucket_node_previous; + } + node->next_free = watch_node_first_free_; + watch_node_first_free_ = node; + node = node_next; + ++bucket; + } + range->next_free = watch_range_first_free_; + watch_range_first_free_ = range; +} + void SharedMemory::GetRangesToUpload(uint32_t request_page_first, uint32_t request_page_count) { upload_ranges_.clear(); diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index 1c6233b19..5d775d184 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -47,10 +47,16 @@ class SharedMemory { typedef void* WatchHandle; // Registers a callback invoked when something is written to the specified // memory range by the CPU or (if triggered explicitly - such as by a resolve) - // the GPU. Generally the context is the subsystem pointer (for example, the + // the GPU. + // + // Generally the context is the subsystem pointer (for example, the // texture cache), the data is the object (such as a texture), and the // argument is additional subsystem/object-specific data (such as whether the // range belongs to the base mip level or to the rest of the mips). + // + // The callback is called with the mutex locked. Do NOT watch or unwatch + // ranges from within it! The watch for the callback is cancelled after the + // callback. WatchHandle WatchMemoryRange(uint32_t start, uint32_t length, WatchCallback callback, void* callback_context, void* callback_data, uint64_t callback_argument); @@ -119,50 +125,55 @@ class SharedMemory { static bool MemoryWriteCallbackThunk(void* context_ptr, uint32_t address); bool MemoryWriteCallback(uint32_t address); + struct WatchNode; // Watched range placed by other GPU subsystems. struct WatchRange { - WatchCallback callback; - void* callback_context; - void* callback_data; - uint64_t callback_argument; - struct WatchNode* node_first; - uint32_t page_first; - uint32_t page_last; + union { + struct { + WatchCallback callback; + void* callback_context; + void* callback_data; + uint64_t callback_argument; + WatchNode* node_first; + uint32_t page_first; + uint32_t page_last; + }; + WatchRange* next_free; + }; }; // Node for faster checking of watches when pages have been written to - all // 512 MB are split into smaller equally sized buckets, and then ranges are // linearly checked. struct WatchNode { - WatchRange* range; - // Links to nodes belonging to other watched ranges in the bucket. - WatchNode* bucket_node_previous; - WatchNode* bucket_node_next; - // Link to another node of this watched range in the next bucket. - WatchNode* range_node_next; + union { + struct { + WatchRange* range; + // Link to another node of this watched range in the next bucket. + WatchNode* range_node_next; + // Links to nodes belonging to other watched ranges in the bucket. + WatchNode* bucket_node_previous; + WatchNode* bucket_node_next; + }; + WatchNode* next_free; + }; }; static constexpr uint32_t kWatchBucketSizeLog2 = 22; static constexpr uint32_t kWatchBucketCount = 1 << (kBufferSizeLog2 - kWatchBucketSizeLog2); WatchNode* watch_buckets_[kWatchBucketCount] = {}; - // Allocations in pools - taking new WatchRanges and WatchNodes from the free + // Allocation from pools - taking new WatchRanges and WatchNodes from the free // list, and if there are none, creating a pool if the current one is fully // used, and linearly allocating from the current pool. - union WatchRangeAllocation { - WatchRange range; - WatchRangeAllocation* next_free; - }; - union WatchNodeAllocation { - WatchNode node; - WatchNodeAllocation* next_free; - }; static constexpr uint32_t kWatchRangePoolSize = 8192; static constexpr uint32_t kWatchNodePoolSize = 8192; - std::vector watch_range_pools_; - std::vector watch_node_pools_; + std::vector watch_range_pools_; + std::vector watch_node_pools_; uint32_t watch_range_current_pool_allocated_ = 0; uint32_t watch_node_current_pool_allocated_ = 0; - WatchRangeAllocation* watch_range_first_free = nullptr; - WatchNodeAllocation* watch_node_first_free = nullptr; + WatchRange* watch_range_first_free_ = nullptr; + WatchNode* watch_node_first_free_ = nullptr; + // Unlinks and frees the range and its nodes. Call this with the mutex locked. + void UnlinkWatchRange(WatchRange* range); // *************************************************************************** // Things above should be protected by validity_mutex_.