diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc
index eb922cfff..ee71d43d1 100644
--- a/src/xenia/gpu/d3d12/shared_memory.cc
+++ b/src/xenia/gpu/d3d12/shared_memory.cc
@@ -115,6 +115,77 @@ void SharedMemory::BeginFrame() {
 
 void SharedMemory::EndFrame() { upload_buffer_pool_->EndFrame(); }
 
+SharedMemory::WatchHandle SharedMemory::WatchMemoryRange(
+    uint32_t start, uint32_t length, WatchCallback callback,
+    void* callback_context, void* callback_data, uint64_t callback_argument) {
+  start &= kAddressMask;
+  if (start >= kBufferSize || length == 0) {
+    return nullptr;
+  }
+  length = std::min(length, kBufferSize - start);
+  uint32_t watch_page_first = start >> page_size_log2_;
+  uint32_t watch_page_last = (start + length - 1) >> page_size_log2_;
+  uint32_t bucket_first =
+      watch_page_first << page_size_log2_ >> kWatchBucketSizeLog2;
+  uint32_t bucket_last =
+      watch_page_last << page_size_log2_ >> kWatchBucketSizeLog2;
+
+  std::lock_guard<std::mutex> lock(validity_mutex_);
+
+  // Allocate the range.
+  WatchRange* range = watch_range_first_free_;
+  if (range != nullptr) {
+    watch_range_first_free_ = range->next_free;
+  } else {
+    if (watch_range_pools_.empty() ||
+        watch_range_current_pool_allocated_ >= kWatchRangePoolSize) {
+      watch_range_pools_.push_back(new WatchRange[kWatchRangePoolSize]);
+      watch_range_current_pool_allocated_ = 0;
+      range =
+          &(watch_range_pools_.back()[watch_range_current_pool_allocated_++]);
+    }
+  }
+  range->callback = callback;
+  range->callback_context = callback_context;
+  range->callback_data = callback_data;
+  range->callback_argument = callback_argument;
+  range->page_first = watch_page_first;
+  range->page_last = watch_page_last;
+
+  // Allocate and link the nodes.
+  WatchNode* node_previous = nullptr;
+  for (uint32_t i = bucket_first; i <= bucket_last; ++i) {
+    WatchNode* node = watch_node_first_free_;
+    if (node != nullptr) {
+      watch_node_first_free_ = node->next_free;
+    } else {
+      if (watch_node_pools_.empty() ||
+          watch_node_current_pool_allocated_ >= kWatchNodePoolSize) {
+        watch_node_pools_.push_back(new WatchNode[kWatchNodePoolSize]);
+        watch_node_current_pool_allocated_ = 0;
+        node =
+            &(watch_node_pools_.back()[watch_node_current_pool_allocated_++]);
+      }
+    }
+    node->range = range;
+    node->range_node_next = nullptr;
+    if (node_previous != nullptr) {
+      node_previous->range_node_next = node;
+    } else {
+      range->node_first = node;
+    }
+    node_previous = node;
+    node->bucket_node_previous = nullptr;
+    node->bucket_node_next = watch_buckets_[i];
+    if (watch_buckets_[i] != nullptr) {
+      watch_buckets_[i]->bucket_node_previous = node;
+    }
+    watch_buckets_[i] = node;
+  }
+
+  return reinterpret_cast<WatchHandle>(range);
+}
+
 bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
                                 ID3D12GraphicsCommandList* command_list) {
   if (length == 0) {
@@ -122,7 +193,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
     return true;
   }
   start &= kAddressMask;
-  if ((kBufferSize - start) < length) {
+  if (start >= kBufferSize || (kBufferSize - start) < length) {
     // Exceeds the physical address space.
     return false;
   }
@@ -247,6 +318,29 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first,
       cpu::MMIOHandler::WatchType::kWatchWrite, false);
 }
 
+void SharedMemory::UnlinkWatchRange(WatchRange* range) {
+  uint32_t bucket =
+      range->page_first << page_size_log2_ >> kWatchBucketSizeLog2;
+  WatchNode* node = range->node_first;
+  while (node != nullptr) {
+    WatchNode* node_next = node->range_node_next;
+    if (node->bucket_node_previous != nullptr) {
+      node->bucket_node_previous->bucket_node_next = node->bucket_node_next;
+    } else {
+      watch_buckets_[bucket] = node->bucket_node_next;
+    }
+    if (node->bucket_node_next != nullptr) {
+      node->bucket_node_next->bucket_node_previous = node->bucket_node_previous;
+    }
+    node->next_free = watch_node_first_free_;
+    watch_node_first_free_ = node;
+    node = node_next;
+    ++bucket;
+  }
+  range->next_free = watch_range_first_free_;
+  watch_range_first_free_ = range;
+}
+
 void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
                                      uint32_t request_page_count) {
   upload_ranges_.clear();
diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h
index 1c6233b19..5d775d184 100644
--- a/src/xenia/gpu/d3d12/shared_memory.h
+++ b/src/xenia/gpu/d3d12/shared_memory.h
@@ -47,10 +47,16 @@ class SharedMemory {
   typedef void* WatchHandle;
   // Registers a callback invoked when something is written to the specified
   // memory range by the CPU or (if triggered explicitly - such as by a resolve)
-  // the GPU. Generally the context is the subsystem pointer (for example, the
+  // the GPU.
+  //
+  // Generally the context is the subsystem pointer (for example, the
   // texture cache), the data is the object (such as a texture), and the
   // argument is additional subsystem/object-specific data (such as whether the
   // range belongs to the base mip level or to the rest of the mips).
+  //
+  // The callback is called with the mutex locked. Do NOT watch or unwatch
+  // ranges from within it! The watch for the callback is cancelled after the
+  // callback.
   WatchHandle WatchMemoryRange(uint32_t start, uint32_t length,
                                WatchCallback callback, void* callback_context,
                                void* callback_data, uint64_t callback_argument);
@@ -119,50 +125,55 @@ class SharedMemory {
   static bool MemoryWriteCallbackThunk(void* context_ptr, uint32_t address);
   bool MemoryWriteCallback(uint32_t address);
 
+  struct WatchNode;
   // Watched range placed by other GPU subsystems.
   struct WatchRange {
-    WatchCallback callback;
-    void* callback_context;
-    void* callback_data;
-    uint64_t callback_argument;
-    struct WatchNode* node_first;
-    uint32_t page_first;
-    uint32_t page_last;
+    union {
+      struct {
+        WatchCallback callback;
+        void* callback_context;
+        void* callback_data;
+        uint64_t callback_argument;
+        WatchNode* node_first;
+        uint32_t page_first;
+        uint32_t page_last;
+      };
+      WatchRange* next_free;
+    };
   };
   // Node for faster checking of watches when pages have been written to - all
   // 512 MB are split into smaller equally sized buckets, and then ranges are
   // linearly checked.
   struct WatchNode {
-    WatchRange* range;
-    // Links to nodes belonging to other watched ranges in the bucket.
-    WatchNode* bucket_node_previous;
-    WatchNode* bucket_node_next;
-    // Link to another node of this watched range in the next bucket.
-    WatchNode* range_node_next;
+    union {
+      struct {
+        WatchRange* range;
+        // Link to another node of this watched range in the next bucket.
+        WatchNode* range_node_next;
+        // Links to nodes belonging to other watched ranges in the bucket.
+        WatchNode* bucket_node_previous;
+        WatchNode* bucket_node_next;
+      };
+      WatchNode* next_free;
+    };
   };
   static constexpr uint32_t kWatchBucketSizeLog2 = 22;
   static constexpr uint32_t kWatchBucketCount =
       1 << (kBufferSizeLog2 - kWatchBucketSizeLog2);
   WatchNode* watch_buckets_[kWatchBucketCount] = {};
-  // Allocations in pools - taking new WatchRanges and WatchNodes from the free
+  // Allocation from pools - taking new WatchRanges and WatchNodes from the free
   // list, and if there are none, creating a pool if the current one is fully
   // used, and linearly allocating from the current pool.
-  union WatchRangeAllocation {
-    WatchRange range;
-    WatchRangeAllocation* next_free;
-  };
-  union WatchNodeAllocation {
-    WatchNode node;
-    WatchNodeAllocation* next_free;
-  };
   static constexpr uint32_t kWatchRangePoolSize = 8192;
   static constexpr uint32_t kWatchNodePoolSize = 8192;
-  std::vector<WatchRangeAllocation*> watch_range_pools_;
-  std::vector<WatchNodeAllocation*> watch_node_pools_;
+  std::vector<WatchRange*> watch_range_pools_;
+  std::vector<WatchNode*> watch_node_pools_;
   uint32_t watch_range_current_pool_allocated_ = 0;
   uint32_t watch_node_current_pool_allocated_ = 0;
-  WatchRangeAllocation* watch_range_first_free = nullptr;
-  WatchNodeAllocation* watch_node_first_free = nullptr;
+  WatchRange* watch_range_first_free_ = nullptr;
+  WatchNode* watch_node_first_free_ = nullptr;
+  // Unlinks and frees the range and its nodes. Call this with the mutex locked.
+  void UnlinkWatchRange(WatchRange* range);
 
   // ***************************************************************************
   // Things above should be protected by validity_mutex_.