[D3D12] SHM watch creation

2018-08-18 00:16:55 +03:00 · 2018-08-18 00:16:55 +03:00 · 9e21f5ab67
parent 005040e885
commit 9e21f5ab67
2 changed files with 133 additions and 28 deletions
--- a/src/xenia/gpu/d3d12/shared_memory.cc
+++ b/src/xenia/gpu/d3d12/shared_memory.cc
@ -115,6 +115,77 @@ void SharedMemory::BeginFrame() {

 void SharedMemory::EndFrame() { upload_buffer_pool_->EndFrame(); }

+SharedMemory::WatchHandle SharedMemory::WatchMemoryRange(
+    uint32_t start, uint32_t length, WatchCallback callback,
+    void* callback_context, void* callback_data, uint64_t callback_argument) {
+  start &= kAddressMask;
+  if (start >= kBufferSize || length == 0) {
+    return nullptr;
+  }
+  length = std::min(length, kBufferSize - start);
+  uint32_t watch_page_first = start >> page_size_log2_;
+  uint32_t watch_page_last = (start + length - 1) >> page_size_log2_;
+  uint32_t bucket_first =
+      watch_page_first << page_size_log2_ >> kWatchBucketSizeLog2;
+  uint32_t bucket_last =
+      watch_page_last << page_size_log2_ >> kWatchBucketSizeLog2;
+
+  std::lock_guard<std::mutex> lock(validity_mutex_);
+
+  // Allocate the range.
+  WatchRange* range = watch_range_first_free_;
+  if (range != nullptr) {
+    watch_range_first_free_ = range->next_free;
+  } else {
+    if (watch_range_pools_.empty() ||
+        watch_range_current_pool_allocated_ >= kWatchRangePoolSize) {
+      watch_range_pools_.push_back(new WatchRange[kWatchRangePoolSize]);
+      watch_range_current_pool_allocated_ = 0;
+      range =
+          &(watch_range_pools_.back()[watch_range_current_pool_allocated_++]);
+    }
+  }
+  range->callback = callback;
+  range->callback_context = callback_context;
+  range->callback_data = callback_data;
+  range->callback_argument = callback_argument;
+  range->page_first = watch_page_first;
+  range->page_last = watch_page_last;
+
+  // Allocate and link the nodes.
+  WatchNode* node_previous = nullptr;
+  for (uint32_t i = bucket_first; i <= bucket_last; ++i) {
+    WatchNode* node = watch_node_first_free_;
+    if (node != nullptr) {
+      watch_node_first_free_ = node->next_free;
+    } else {
+      if (watch_node_pools_.empty() ||
+          watch_node_current_pool_allocated_ >= kWatchNodePoolSize) {
+        watch_node_pools_.push_back(new WatchNode[kWatchNodePoolSize]);
+        watch_node_current_pool_allocated_ = 0;
+        node =
+            &(watch_node_pools_.back()[watch_node_current_pool_allocated_++]);
+      }
+    }
+    node->range = range;
+    node->range_node_next = nullptr;
+    if (node_previous != nullptr) {
+      node_previous->range_node_next = node;
+    } else {
+      range->node_first = node;
+    }
+    node_previous = node;
+    node->bucket_node_previous = nullptr;
+    node->bucket_node_next = watch_buckets_[i];
+    if (watch_buckets_[i] != nullptr) {
+      watch_buckets_[i]->bucket_node_previous = node;
+    }
+    watch_buckets_[i] = node;
+  }
+
+  return reinterpret_cast<WatchHandle>(range);
+}
+
 bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
                                ID3D12GraphicsCommandList* command_list) {
  if (length == 0) {
@ -122,7 +193,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
    return true;
  }
  start &= kAddressMask;
-  if ((kBufferSize - start) < length) {
+  if (start >= kBufferSize || (kBufferSize - start) < length) {
    // Exceeds the physical address space.
    return false;
  }
@ -247,6 +318,29 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first,
      cpu::MMIOHandler::WatchType::kWatchWrite, false);
 }

+void SharedMemory::UnlinkWatchRange(WatchRange* range) {
+  uint32_t bucket =
+      range->page_first << page_size_log2_ >> kWatchBucketSizeLog2;
+  WatchNode* node = range->node_first;
+  while (node != nullptr) {
+    WatchNode* node_next = node->range_node_next;
+    if (node->bucket_node_previous != nullptr) {
+      node->bucket_node_previous->bucket_node_next = node->bucket_node_next;
+    } else {
+      watch_buckets_[bucket] = node->bucket_node_next;
+    }
+    if (node->bucket_node_next != nullptr) {
+      node->bucket_node_next->bucket_node_previous = node->bucket_node_previous;
+    }
+    node->next_free = watch_node_first_free_;
+    watch_node_first_free_ = node;
+    node = node_next;
+    ++bucket;
+  }
+  range->next_free = watch_range_first_free_;
+  watch_range_first_free_ = range;
+}
+
 void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
                                     uint32_t request_page_count) {
  upload_ranges_.clear();
--- a/src/xenia/gpu/d3d12/shared_memory.h
+++ b/src/xenia/gpu/d3d12/shared_memory.h
@ -47,10 +47,16 @@ class SharedMemory {
  typedef void* WatchHandle;
  // Registers a callback invoked when something is written to the specified
  // memory range by the CPU or (if triggered explicitly - such as by a resolve)
-  // the GPU. Generally the context is the subsystem pointer (for example, the
+  // the GPU.
+  //
+  // Generally the context is the subsystem pointer (for example, the
  // texture cache), the data is the object (such as a texture), and the
  // argument is additional subsystem/object-specific data (such as whether the
  // range belongs to the base mip level or to the rest of the mips).
+  //
+  // The callback is called with the mutex locked. Do NOT watch or unwatch
+  // ranges from within it! The watch for the callback is cancelled after the
+  // callback.
  WatchHandle WatchMemoryRange(uint32_t start, uint32_t length,
                               WatchCallback callback, void* callback_context,
                               void* callback_data, uint64_t callback_argument);
@ -119,50 +125,55 @@ class SharedMemory {
  static bool MemoryWriteCallbackThunk(void* context_ptr, uint32_t address);
  bool MemoryWriteCallback(uint32_t address);

+  struct WatchNode;
  // Watched range placed by other GPU subsystems.
  struct WatchRange {
+    union {
+      struct {
        WatchCallback callback;
        void* callback_context;
        void* callback_data;
        uint64_t callback_argument;
-    struct WatchNode* node_first;
+        WatchNode* node_first;
        uint32_t page_first;
        uint32_t page_last;
      };
+      WatchRange* next_free;
+    };
+  };
  // Node for faster checking of watches when pages have been written to - all
  // 512 MB are split into smaller equally sized buckets, and then ranges are
  // linearly checked.
  struct WatchNode {
+    union {
+      struct {
        WatchRange* range;
+        // Link to another node of this watched range in the next bucket.
+        WatchNode* range_node_next;
        // Links to nodes belonging to other watched ranges in the bucket.
        WatchNode* bucket_node_previous;
        WatchNode* bucket_node_next;
-    // Link to another node of this watched range in the next bucket.
-    WatchNode* range_node_next;
+      };
+      WatchNode* next_free;
+    };
  };
  static constexpr uint32_t kWatchBucketSizeLog2 = 22;
  static constexpr uint32_t kWatchBucketCount =
      1 << (kBufferSizeLog2 - kWatchBucketSizeLog2);
  WatchNode* watch_buckets_[kWatchBucketCount] = {};
-  // Allocations in pools - taking new WatchRanges and WatchNodes from the free
+  // Allocation from pools - taking new WatchRanges and WatchNodes from the free
  // list, and if there are none, creating a pool if the current one is fully
  // used, and linearly allocating from the current pool.
-  union WatchRangeAllocation {
-    WatchRange range;
-    WatchRangeAllocation* next_free;
-  };
-  union WatchNodeAllocation {
-    WatchNode node;
-    WatchNodeAllocation* next_free;
-  };
  static constexpr uint32_t kWatchRangePoolSize = 8192;
  static constexpr uint32_t kWatchNodePoolSize = 8192;
-  std::vector<WatchRangeAllocation*> watch_range_pools_;
-  std::vector<WatchNodeAllocation*> watch_node_pools_;
+  std::vector<WatchRange*> watch_range_pools_;
+  std::vector<WatchNode*> watch_node_pools_;
  uint32_t watch_range_current_pool_allocated_ = 0;
  uint32_t watch_node_current_pool_allocated_ = 0;
-  WatchRangeAllocation* watch_range_first_free = nullptr;
-  WatchNodeAllocation* watch_node_first_free = nullptr;
+  WatchRange* watch_range_first_free_ = nullptr;
+  WatchNode* watch_node_first_free_ = nullptr;
+  // Unlinks and frees the range and its nodes. Call this with the mutex locked.
+  void UnlinkWatchRange(WatchRange* range);

  // ***************************************************************************
  // Things above should be protected by validity_mutex_.