diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index ee71d43d1..f6f84e457 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -130,7 +130,7 @@ SharedMemory::WatchHandle SharedMemory::WatchMemoryRange( uint32_t bucket_last = watch_page_last << page_size_log2_ >> kWatchBucketSizeLog2; - std::lock_guard lock(validity_mutex_); + std::lock_guard lock(validity_mutex_); // Allocate the range. WatchRange* range = watch_range_first_free_; @@ -141,9 +141,8 @@ SharedMemory::WatchHandle SharedMemory::WatchMemoryRange( watch_range_current_pool_allocated_ >= kWatchRangePoolSize) { watch_range_pools_.push_back(new WatchRange[kWatchRangePoolSize]); watch_range_current_pool_allocated_ = 0; - range = - &(watch_range_pools_.back()[watch_range_current_pool_allocated_++]); } + range = &(watch_range_pools_.back()[watch_range_current_pool_allocated_++]); } range->callback = callback; range->callback_context = callback_context; @@ -163,9 +162,8 @@ SharedMemory::WatchHandle SharedMemory::WatchMemoryRange( watch_node_current_pool_allocated_ >= kWatchNodePoolSize) { watch_node_pools_.push_back(new WatchNode[kWatchNodePoolSize]); watch_node_current_pool_allocated_ = 0; - node = - &(watch_node_pools_.back()[watch_node_current_pool_allocated_++]); } + node = &(watch_node_pools_.back()[watch_node_current_pool_allocated_++]); } node->range = range; node->range_node_next = nullptr; @@ -186,6 +184,15 @@ SharedMemory::WatchHandle SharedMemory::WatchMemoryRange( return reinterpret_cast(range); } +void SharedMemory::UnwatchMemoryRange(WatchHandle handle) { + if (handle == nullptr) { + // Could be a zero length range. + return; + } + std::lock_guard lock(validity_mutex_); + UnlinkWatchRange(reinterpret_cast(handle)); +} + bool SharedMemory::RequestRange(uint32_t start, uint32_t length, ID3D12GraphicsCommandList* command_list) { if (length == 0) { @@ -299,7 +306,7 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first, uint32_t valid_block_first = valid_page_first >> 6; uint32_t valid_block_last = valid_page_last >> 6; - std::lock_guard lock(validity_mutex_); + std::lock_guard lock(validity_mutex_); for (uint32_t i = valid_block_first; i <= valid_block_last; ++i) { uint64_t valid_bits = UINT64_MAX; @@ -353,7 +360,7 @@ void SharedMemory::GetRangesToUpload(uint32_t request_page_first, uint32_t request_block_first = request_page_first >> 6; uint32_t request_block_last = request_page_last >> 6; - std::lock_guard lock(validity_mutex_); + std::lock_guard lock(validity_mutex_); uint32_t range_start = UINT32_MAX; for (uint32_t i = request_block_first; i <= request_block_last; ++i) { @@ -414,14 +421,28 @@ bool SharedMemory::MemoryWriteCallback(uint32_t address) { uint32_t block_index = page_index >> 6; uint64_t page_bit = 1ull << (page_index & 63); - std::lock_guard lock(validity_mutex_); + std::lock_guard lock(validity_mutex_); if (!(protected_pages_[block_index] & page_bit)) { return false; } valid_pages_[block_index] &= ~page_bit; - // TODO(Triang3l): Invoke watch callbacks. + + // Trigger watch callbacks. + WatchNode* node = + watch_buckets_[page_index << page_size_log2_ >> kWatchBucketSizeLog2]; + while (node != nullptr) { + WatchRange* range = node->range; + // Store the next node now since when the callback is triggered, the links + // will be broken. + node = node->bucket_node_next; + if (page_index >= range->page_first && page_index <= range->page_last) { + range->callback(range->callback_context, range->callback_data, + range->callback_argument); + UnlinkWatchRange(range); + } + } memory_->UnprotectPhysicalMemory(page_index << page_size_log2_, 1 << page_size_log2_, false); diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index 5d775d184..d49999004 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -56,10 +56,16 @@ class SharedMemory { // // The callback is called with the mutex locked. Do NOT watch or unwatch // ranges from within it! The watch for the callback is cancelled after the - // callback. + // callback - the handle becomes invalid. WatchHandle WatchMemoryRange(uint32_t start, uint32_t length, WatchCallback callback, void* callback_context, void* callback_data, uint64_t callback_argument); + // Unregisters previously registered watched memory range. + void UnwatchMemoryRange(WatchHandle handle); + // Locks the mutex that gets locked when watch callbacks are invoked - must be + // done when checking variables that may be changed by a watch callback. + inline void LockWatchMutex() { validity_mutex_.lock(); } + inline void UnlockWatchMutex() { validity_mutex_.unlock(); } // Checks if the range has been updated, uploads new data if needed and // ensures the buffer tiles backing the range are resident. May transition the @@ -108,7 +114,7 @@ class SharedMemory { // Mutex between the exception handler and the command processor, to be locked // when checking or updating validity of pages/ranges. - std::mutex validity_mutex_; + std::recursive_mutex validity_mutex_; // *************************************************************************** // Things below should be protected by validity_mutex_. diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 4ab1c60a5..b2d0e96e5 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -701,6 +701,8 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { texture->base_size *= key.depth; texture->mip_size *= key.depth; } + texture->base_watch_handle = nullptr; + texture->mip_watch_handle = nullptr; textures_.insert(std::make_pair(map_key, texture)); LogTextureAction(texture, "Created"); @@ -708,7 +710,12 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { } bool TextureCache::LoadTextureData(Texture* texture) { - if (texture->base_in_sync && texture->mips_in_sync) { + // See what we need to upload. + shared_memory_->LockWatchMutex(); + bool base_in_sync = texture->base_in_sync; + bool mips_in_sync = texture->mips_in_sync; + shared_memory_->UnlockWatchMutex(); + if (base_in_sync && mips_in_sync) { return true; } @@ -731,13 +738,13 @@ bool TextureCache::LoadTextureData(Texture* texture) { } // Request uploading of the texture data to the shared memory. - if (!texture->base_in_sync) { + if (!base_in_sync) { if (!shared_memory_->RequestRange(texture->key.base_page << 12, texture->base_size, command_list)) { return false; } } - if (!texture->mips_in_sync) { + if (!mips_in_sync) { if (!shared_memory_->RequestRange(texture->key.mip_page << 12, texture->mip_size, command_list)) { return false; @@ -811,8 +818,8 @@ bool TextureCache::LoadTextureData(Texture* texture) { command_list->ResourceBarrier(1, barriers); texture->state = D3D12_RESOURCE_STATE_COPY_DEST; } - uint32_t mip_first = texture->base_in_sync ? 1 : 0; - uint32_t mip_last = texture->mips_in_sync ? 0 : resource_desc.MipLevels - 1; + uint32_t mip_first = base_in_sync ? 1 : 0; + uint32_t mip_last = mips_in_sync ? 0 : resource_desc.MipLevels - 1; auto cbuffer_pool = command_processor_->GetConstantBufferPool(); CopyConstants copy_constants; copy_constants.is_3d = is_3d ? 1 : 0; @@ -903,13 +910,46 @@ bool TextureCache::LoadTextureData(Texture* texture) { command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); + // Mark the ranges as uploaded and watch them. + shared_memory_->LockWatchMutex(); texture->base_in_sync = true; texture->mips_in_sync = true; + if (!base_in_sync) { + texture->base_watch_handle = shared_memory_->WatchMemoryRange( + texture->key.base_page << 12, texture->base_size, WatchCallbackThunk, + this, texture, 0); + } + if (!mips_in_sync) { + texture->mip_watch_handle = shared_memory_->WatchMemoryRange( + texture->key.mip_page << 12, texture->mip_size, WatchCallbackThunk, + this, texture, 1); + } + shared_memory_->UnlockWatchMutex(); LogTextureAction(texture, "Loaded"); return true; } +void TextureCache::WatchCallbackThunk(void* context, void* data, + uint64_t argument) { + TextureCache* texture_cache = reinterpret_cast(context); + texture_cache->WatchCallback(reinterpret_cast(data), argument != 0); +} + +void TextureCache::WatchCallback(Texture* texture, bool is_mip) { + // Mutex already locked here. + if (is_mip) { + texture->mips_in_sync = false; + texture->mip_watch_handle = nullptr; + } else { + texture->base_in_sync = false; + texture->base_watch_handle = nullptr; + } + XELOGE("Texture %s at %.8X invalidated", is_mip ? "mips" : "base", + (is_mip ? texture->key.mip_page : texture->key.base_page) << 12); + // TODO(Triang3l): Notify bindings that ranges should be requested again. +} + void TextureCache::ClearBindings() { std::memset(texture_bindings_, 0, sizeof(texture_bindings_)); texture_keys_in_sync_ = 0; diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index 4eca804d0..b9f35f19b 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -181,9 +181,16 @@ class TextureCache { uint32_t mip_offsets[14]; // Byte pitches of each mipmap within one slice (for linear layout mainly). uint32_t mip_pitches[14]; - // Whether the recent base level data has been loaded from the memory. + + // Watch handles for the memory ranges (protected by the shared memory watch + // mutex). + SharedMemory::WatchHandle base_watch_handle; + SharedMemory::WatchHandle mip_watch_handle; + // Whether the recent base level data has been loaded from the memory + // (protected by the shared memory watch mutex). bool base_in_sync; - // Whether the recent mip data has been loaded from the memory. + // Whether the recent mip data has been loaded from the memory (protected by + // the shared memory watch mutex). bool mips_in_sync; }; @@ -234,6 +241,10 @@ class TextureCache { // allocates descriptors and copies! bool LoadTextureData(Texture* texture); + // Shared memory callback for texture data invalidation. + static void WatchCallbackThunk(void* context, void* data, uint64_t argument); + void WatchCallback(Texture* texture, bool is_mip); + // Makes all bindings invalid. Also requesting textures after calling this // will cause another attempt to create a texture or to untile it if there was // an error.