diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index 3a5da1990..eb922cfff 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -30,7 +30,7 @@ SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context) assert_true(page_bitmap_length != 0); valid_pages_.resize(page_bitmap_length); - watched_pages_.resize(page_bitmap_length); + protected_pages_.resize(page_bitmap_length); } SharedMemory::~SharedMemory() { Shutdown(); } @@ -76,13 +76,13 @@ bool SharedMemory::Initialize() { std::memset(valid_pages_.data(), 0, valid_pages_.size() * sizeof(uint64_t)); - std::memset(watched_pages_.data(), 0, - watched_pages_.size() * sizeof(uint64_t)); + std::memset(protected_pages_.data(), 0, + protected_pages_.size() * sizeof(uint64_t)); upload_buffer_pool_ = std::make_unique(context_, 4 * 1024 * 1024); - memory_->SetGlobalPhysicalAccessWatch(WatchCallbackThunk, this); + memory_->SetGlobalPhysicalAccessWatch(MemoryWriteCallbackThunk, this); return true; } @@ -179,7 +179,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length, } } - // Upload and watch used ranges. + // Upload and protect used ranges. GetRangesToUpload(start >> page_size_log2_, ((start & ((1 << page_size_log2_) - 1)) + length + ((1 << page_size_log2_) - 1)) >> @@ -239,7 +239,7 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first, valid_bits &= (1ull << ((valid_page_last & 63) + 1)) - 1; } valid_pages_[i] |= valid_bits; - watched_pages_[i] |= valid_bits; + protected_pages_[i] |= valid_bits; } memory_->ProtectPhysicalMemory( @@ -309,27 +309,29 @@ void SharedMemory::GetRangesToUpload(uint32_t request_page_first, } } -bool SharedMemory::WatchCallbackThunk(void* context_ptr, uint32_t address) { - return reinterpret_cast(context_ptr)->WatchCallback(address); +bool SharedMemory::MemoryWriteCallbackThunk(void* context_ptr, + uint32_t address) { + SharedMemory* shared_memory = reinterpret_cast(context_ptr); + return shared_memory->MemoryWriteCallback(address); } -bool SharedMemory::WatchCallback(uint32_t address) { +bool SharedMemory::MemoryWriteCallback(uint32_t address) { uint32_t page_index = (address & kAddressMask) >> page_size_log2_; uint32_t block_index = page_index >> 6; uint64_t page_bit = 1ull << (page_index & 63); std::lock_guard lock(validity_mutex_); - if (!(watched_pages_[block_index] & page_bit)) { + if (!(protected_pages_[block_index] & page_bit)) { return false; } valid_pages_[block_index] &= ~page_bit; - // TODO(Triang3l): Invoke texture invalidation callbacks. + // TODO(Triang3l): Invoke watch callbacks. memory_->UnprotectPhysicalMemory(page_index << page_size_log2_, 1 << page_size_log2_, false); - watched_pages_[block_index] &= ~page_bit; + protected_pages_[block_index] &= ~page_bit; return true; } diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index 6fd0574dd..1c6233b19 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -43,6 +43,18 @@ class SharedMemory { // The draw command list is needed for the transition. void EndFrame(); + typedef void (*WatchCallback)(void* context, void* data, uint64_t argument); + typedef void* WatchHandle; + // Registers a callback invoked when something is written to the specified + // memory range by the CPU or (if triggered explicitly - such as by a resolve) + // the GPU. Generally the context is the subsystem pointer (for example, the + // texture cache), the data is the object (such as a texture), and the + // argument is additional subsystem/object-specific data (such as whether the + // range belongs to the base mip level or to the rest of the mips). + WatchHandle WatchMemoryRange(uint32_t start, uint32_t length, + WatchCallback callback, void* callback_context, + void* callback_data, uint64_t callback_argument); + // Checks if the range has been updated, uploads new data if needed and // ensures the buffer tiles backing the range are resident. May transition the // tiled buffer to copy destination - call this before UseForReading or @@ -91,11 +103,71 @@ class SharedMemory { // Mutex between the exception handler and the command processor, to be locked // when checking or updating validity of pages/ranges. std::mutex validity_mutex_; + + // *************************************************************************** + // Things below should be protected by validity_mutex_. + // *************************************************************************** + // Bit vector containing whether physical memory system pages are up to date. std::vector valid_pages_; - // Mark the memory range as updated and watch it. + // Mark the memory range as updated and protect it. void MakeRangeValid(uint32_t valid_page_first, uint32_t valid_page_count); + // Whether each physical page is protected by the GPU code (after uploading). + std::vector protected_pages_; + // Memory access callback. + static bool MemoryWriteCallbackThunk(void* context_ptr, uint32_t address); + bool MemoryWriteCallback(uint32_t address); + + // Watched range placed by other GPU subsystems. + struct WatchRange { + WatchCallback callback; + void* callback_context; + void* callback_data; + uint64_t callback_argument; + struct WatchNode* node_first; + uint32_t page_first; + uint32_t page_last; + }; + // Node for faster checking of watches when pages have been written to - all + // 512 MB are split into smaller equally sized buckets, and then ranges are + // linearly checked. + struct WatchNode { + WatchRange* range; + // Links to nodes belonging to other watched ranges in the bucket. + WatchNode* bucket_node_previous; + WatchNode* bucket_node_next; + // Link to another node of this watched range in the next bucket. + WatchNode* range_node_next; + }; + static constexpr uint32_t kWatchBucketSizeLog2 = 22; + static constexpr uint32_t kWatchBucketCount = + 1 << (kBufferSizeLog2 - kWatchBucketSizeLog2); + WatchNode* watch_buckets_[kWatchBucketCount] = {}; + // Allocations in pools - taking new WatchRanges and WatchNodes from the free + // list, and if there are none, creating a pool if the current one is fully + // used, and linearly allocating from the current pool. + union WatchRangeAllocation { + WatchRange range; + WatchRangeAllocation* next_free; + }; + union WatchNodeAllocation { + WatchNode node; + WatchNodeAllocation* next_free; + }; + static constexpr uint32_t kWatchRangePoolSize = 8192; + static constexpr uint32_t kWatchNodePoolSize = 8192; + std::vector watch_range_pools_; + std::vector watch_node_pools_; + uint32_t watch_range_current_pool_allocated_ = 0; + uint32_t watch_node_current_pool_allocated_ = 0; + WatchRangeAllocation* watch_range_first_free = nullptr; + WatchNodeAllocation* watch_node_first_free = nullptr; + + // *************************************************************************** + // Things above should be protected by validity_mutex_. + // *************************************************************************** + // First page and length in pages. typedef std::pair UploadRange; // Ranges that need to be uploaded, generated by GetRangesToUpload (a @@ -105,13 +177,6 @@ class SharedMemory { uint32_t request_page_count); std::unique_ptr upload_buffer_pool_ = nullptr; - // Whether each physical page is watched by the GPU (after uploading). - // Once a watch is triggered, it's not watched anymore. - std::vector watched_pages_; - // Memory access callback. - static bool WatchCallbackThunk(void* context_ptr, uint32_t address); - bool WatchCallback(uint32_t address); - void TransitionBuffer(D3D12_RESOURCE_STATES new_state, ID3D12GraphicsCommandList* command_list); };