From 6e36101b42dafb83505c590bd92a0790ffd4d56c Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 24 Sep 2018 23:18:16 +0300 Subject: [PATCH] [D3D12] Experimental write watch implementation for shared memory --- src/xenia/cpu/mmio_handler.cc | 266 +++++++++++++++++------ src/xenia/cpu/mmio_handler.h | 90 ++++++-- src/xenia/gpu/d3d12/shared_memory.cc | 121 +++++------ src/xenia/gpu/d3d12/shared_memory.h | 18 +- src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc | 3 +- src/xenia/memory.cc | 77 +++++-- src/xenia/memory.h | 36 +-- 7 files changed, 424 insertions(+), 187 deletions(-) diff --git a/src/xenia/cpu/mmio_handler.cc b/src/xenia/cpu/mmio_handler.cc index ba03e1c9e..f9d40639d 100644 --- a/src/xenia/cpu/mmio_handler.cc +++ b/src/xenia/cpu/mmio_handler.cc @@ -9,6 +9,9 @@ #include "xenia/cpu/mmio_handler.h" +#include +#include + #include "xenia/base/assert.h" #include "xenia/base/byte_order.h" #include "xenia/base/exception_handler.h" @@ -40,6 +43,20 @@ std::unique_ptr MMIOHandler::Install(uint8_t* virtual_membase, return handler; } +MMIOHandler::MMIOHandler(uint8_t* virtual_membase, uint8_t* physical_membase, + uint8_t* membase_end) + : virtual_membase_(virtual_membase), + physical_membase_(physical_membase), + memory_end_(membase_end) { + system_page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size())); + + uint32_t physical_page_count = (512 * 1024 * 1024) >> system_page_size_log2_; + physical_write_watched_pages_.resize(physical_page_count >> 4); + assert_true(physical_write_watched_pages_.size() != 0); + std::memset(physical_write_watched_pages_.data(), 0, + physical_write_watched_pages_.size() * sizeof(uint64_t)); +} + MMIOHandler::~MMIOHandler() { ExceptionHandler::Uninstall(ExceptionCallbackThunk, this); @@ -214,72 +231,154 @@ void MMIOHandler::CancelAccessWatch(uintptr_t watch_handle) { delete entry; } -void MMIOHandler::SetGlobalPhysicalAccessWatch( - GlobalAccessWatchCallback callback, void* callback_context) { +void* MMIOHandler::RegisterPhysicalWriteWatch( + PhysicalWriteWatchCallback callback, void* callback_context) { + PhysicalWriteWatchEntry* entry = new PhysicalWriteWatchEntry; + entry->callback = callback; + entry->callback_context = callback_context; + auto lock = global_critical_region_.Acquire(); - global_physical_watch_callback_ = callback; - global_physical_watch_callback_context_ = callback_context; + physical_write_watches_.push_back(entry); + + return entry; } -void MMIOHandler::ProtectPhysicalMemory(uint32_t physical_address, - uint32_t length, WatchType type, - bool protect_host_access) { - uint32_t base_address = physical_address & 0x1FFFFFFF; - - // Can only protect sizes matching system page size. - // This means we need to round up, which will cause spurious access - // violations and invalidations. - // TODO(benvanik): only invalidate if actually within the region? - length = - xe::round_up(length + (base_address % uint32_t(xe::memory::page_size())), - uint32_t(xe::memory::page_size())); - base_address = base_address - (base_address % xe::memory::page_size()); - - auto page_access = memory::PageAccess::kNoAccess; - switch (type) { - case kWatchInvalid: - page_access = memory::PageAccess::kReadWrite; - break; - case kWatchWrite: - page_access = memory::PageAccess::kReadOnly; - break; - case kWatchReadWrite: - page_access = memory::PageAccess::kNoAccess; - break; - default: - assert_unhandled_case(type); - break; - } - - // Protect the range under all address spaces. - if (protect_host_access) { - memory::Protect(physical_membase_ + base_address, length, page_access, - nullptr); - } - memory::Protect(virtual_membase_ + 0xA0000000 + base_address, length, - page_access, nullptr); - memory::Protect(virtual_membase_ + 0xC0000000 + base_address, length, - page_access, nullptr); - memory::Protect(virtual_membase_ + 0xE0000000 + base_address, length, - page_access, nullptr); -} - -void MMIOHandler::UnprotectPhysicalMemory(uint32_t physical_address, - uint32_t length, - bool unprotect_host_access) { - ProtectPhysicalMemory(physical_address, length, kWatchInvalid, - unprotect_host_access); -} - -void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) { +void MMIOHandler::UnregisterPhysicalWriteWatch(void* watch_handle) { + auto entry = reinterpret_cast(watch_handle); auto lock = global_critical_region_.Acquire(); + auto it = std::find(physical_write_watches_.begin(), + physical_write_watches_.end(), entry); + assert_false(it == physical_write_watches_.end()); + if (it != physical_write_watches_.end()) { + physical_write_watches_.erase(it); + } + + delete entry; +} + +void MMIOHandler::ProtectAndWatchPhysicalMemory( + uint32_t physical_address_and_heap, uint32_t length) { + // Bits to set in 16-bit blocks to mark that the pages are protected. + uint64_t block_heap_mask; + if (physical_address_and_heap >= 0xE0000000) { + block_heap_mask = 0x4444444444444444ull; + } else if (physical_address_and_heap >= 0xC0000000) { + block_heap_mask = 0x2222222222222222ull; + } else if (physical_address_and_heap >= 0xA0000000) { + block_heap_mask = 0x1111111111111111ull; + } else { + assert_always(); + return; + } + + uint32_t heap_relative_address = physical_address_and_heap & 0x1FFFFFFF; + length = std::min(length, 0x20000000u - heap_relative_address); + if (length == 0) { + return; + } + + uint32_t page_first = heap_relative_address >> system_page_size_log2_; + uint32_t page_last = + (heap_relative_address + length - 1) >> system_page_size_log2_; + uint32_t block_first = page_first >> 4; + uint32_t block_last = page_last >> 4; + + auto lock = global_critical_region_.Acquire(); + + // Set the bits indicating that the pages are watched and access violations + // there are intentional. + for (uint32_t i = block_first; i <= block_last; ++i) { + uint64_t block_set_bits = block_heap_mask; + if (i == block_first) { + block_set_bits &= ~((1ull << ((page_first & 15) * 4)) - 1); + } + if (i == block_last && (page_last & 15) != 15) { + block_set_bits &= (1ull << (((page_last & 15) + 1) * 4)) - 1; + } + physical_write_watched_pages_[i] |= block_set_bits; + } + + // Protect only in one range (due to difficulties synchronizing protection + // levels between those ranges). + memory::Protect(virtual_membase_ + (physical_address_and_heap & ~0x1FFFFFFF) + + (page_first << system_page_size_log2_), + (page_last - page_first + 1) << system_page_size_log2_, + memory::PageAccess::kReadOnly, nullptr); +} + +void MMIOHandler::InvalidateRange(uint32_t physical_address_and_heap, + uint32_t length, bool unprotect) { + uint32_t heap_relative_address = physical_address_and_heap & 0x1FFFFFFF; + length = std::min(length, 0x20000000u - heap_relative_address); + if (length == 0) { + return; + } + + auto lock = global_critical_region_.Acquire(); + + // Trigger the new (per-page) watches and unwatch the pages. + if (physical_address_and_heap >= 0xA0000000) { + uint32_t heap_address = physical_address_and_heap & ~0x1FFFFFFF; + uint64_t heap_bit; + if (heap_address >= 0xE0000000) { + heap_bit = 1 << 2; + } else if (heap_address >= 0xC0000000) { + heap_bit = 1 << 1; + } else { + heap_bit = 1 << 0; + } + uint32_t page_first = heap_relative_address >> system_page_size_log2_; + uint32_t page_last = + (heap_relative_address + length - 1) >> system_page_size_log2_; + uint32_t range_start = UINT32_MAX; + for (uint32_t i = page_first; i <= page_last; ++i) { + uint64_t page_heap_bit = heap_bit << ((i & 15) * 4); + if (physical_write_watched_pages_[i >> 4] & page_heap_bit) { + if (range_start == UINT32_MAX) { + range_start = i; + } + physical_write_watched_pages_[i >> 4] &= ~page_heap_bit; + } else { + if (range_start != UINT32_MAX) { + for (auto it = physical_write_watches_.begin(); + it != physical_write_watches_.end(); ++it) { + auto entry = *it; + entry->callback(entry->callback_context, range_start, i - 1); + } + if (unprotect) { + memory::Protect(virtual_membase_ + heap_address + + (range_start << system_page_size_log2_), + (i - range_start) << system_page_size_log2_, + xe::memory::PageAccess::kReadWrite, nullptr); + } + range_start = UINT32_MAX; + } + } + } + if (range_start != UINT32_MAX) { + for (auto it = physical_write_watches_.begin(); + it != physical_write_watches_.end(); ++it) { + auto entry = *it; + entry->callback(entry->callback_context, range_start, page_last); + if (unprotect) { + memory::Protect(virtual_membase_ + heap_address + + (range_start << system_page_size_log2_), + (page_last - range_start + 1) + << system_page_size_log2_, + xe::memory::PageAccess::kReadWrite, nullptr); + } + } + } + } + + // Trigger the legacy (per-range) watches. for (auto it = access_watches_.begin(); it != access_watches_.end();) { auto entry = *it; - if ((entry->address <= physical_address && - entry->address + entry->length > physical_address) || - (entry->address >= physical_address && - entry->address < physical_address + length)) { + if ((entry->address <= heap_relative_address && + entry->address + entry->length > heap_relative_address) || + (entry->address >= heap_relative_address && + entry->address < heap_relative_address + length)) { // This watch lies within the range. End it. FireAccessWatch(entry); it = access_watches_.erase(it); @@ -316,17 +415,43 @@ bool MMIOHandler::IsRangeWatched(uint32_t physical_address, size_t length) { return false; } -bool MMIOHandler::CheckAccessWatch(uint32_t physical_address) { - auto lock = global_critical_region_.Acquire(); +bool MMIOHandler::CheckAccessWatch(uint32_t physical_address, + uint32_t heap_address) { + bool hit = false; - if (global_physical_watch_callback_ != nullptr) { - if (global_physical_watch_callback_(global_physical_watch_callback_context_, - physical_address)) { - return true; + // Trigger new (per-page) access watches. + if (heap_address >= 0xA0000000) { + uint32_t page_index = physical_address >> system_page_size_log2_; + // Check the watch only for the virtual memory mapping it was triggered in, + // because as guest protection levels may be different for different + // mappings of the physical memory, it's difficult to synchronize protection + // between the mappings. + uint64_t heap_bit; + if (heap_address >= 0xE0000000) { + heap_bit = 1 << 2; + } else if (heap_address >= 0xC0000000) { + heap_bit = 1 << 1; + } else { + heap_bit = 1 << 0; + } + heap_bit <<= (page_index & 15) * 4; + if (physical_write_watched_pages_[page_index >> 4] & heap_bit) { + hit = true; + memory::Protect(virtual_membase_ + heap_address + + (page_index << system_page_size_log2_), + size_t(1) << system_page_size_log2_, + xe::memory::PageAccess::kReadWrite, nullptr); + physical_write_watched_pages_[page_index >> 4] &= ~heap_bit; + for (auto it = physical_write_watches_.begin(); + it != physical_write_watches_.end(); ++it) { + auto entry = *it; + entry->callback(entry->callback_context, page_index, page_index); + } } } - bool hit = false; + // Trigger legacy (per-range) access watches. + auto lock = global_critical_region_.Acquire(); for (auto it = access_watches_.begin(); it != access_watches_.end();) { auto entry = *it; if (entry->address <= physical_address && @@ -539,14 +664,17 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) { } if (!range) { auto fault_address = reinterpret_cast(ex->fault_address()); - uint32_t guest_address = 0; + uint32_t guest_address, guest_heap_address; if (fault_address >= virtual_membase_ && fault_address < physical_membase_) { // Faulting on a virtual address. guest_address = static_cast(ex->fault_address()) & 0x1FFFFFFF; + guest_heap_address = + static_cast(ex->fault_address()) & ~0x1FFFFFFF; } else { // Faulting on a physical address. guest_address = static_cast(ex->fault_address()); + guest_heap_address = 0; } // HACK: Recheck if the pages are still protected (race condition - another @@ -564,7 +692,9 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) { // Access is not found within any range, so fail and let the caller handle // it (likely by aborting). - return CheckAccessWatch(guest_address); + // TODO(Triang3l): Don't call for the host physical memory view when legacy + // watches are removed. + return CheckAccessWatch(guest_address, guest_heap_address); } auto rip = ex->pc(); diff --git a/src/xenia/cpu/mmio_handler.h b/src/xenia/cpu/mmio_handler.h index b3f5d6972..c8749e807 100644 --- a/src/xenia/cpu/mmio_handler.h +++ b/src/xenia/cpu/mmio_handler.h @@ -30,7 +30,9 @@ typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context, uint32_t addr, uint32_t value); typedef void (*AccessWatchCallback)(void* context_ptr, void* data_ptr, uint32_t address); -typedef bool (*GlobalAccessWatchCallback)(void* context_ptr, uint32_t address); +typedef void (*PhysicalWriteWatchCallback)(void* context_ptr, + uint32_t page_first, + uint32_t page_last); struct MMIORange { uint32_t address; @@ -70,22 +72,69 @@ class MMIOHandler { // either written to or read from, depending on the watch type. These fire as // soon as a read/write happens, and only fire once. // These watches may be spuriously fired if memory is accessed nearby. + // TODO(Triang3l): This is legacy currently used only to support the old + // Vulkan graphics layer. Remove and use WatchPhysicalMemoryWrite instead. uintptr_t AddPhysicalAccessWatch(uint32_t guest_address, size_t length, WatchType type, AccessWatchCallback callback, void* callback_context, void* callback_data); void CancelAccessWatch(uintptr_t watch_handle); - void SetGlobalPhysicalAccessWatch(GlobalAccessWatchCallback callback, - void* callback_context); - void ProtectPhysicalMemory(uint32_t physical_address, uint32_t length, - WatchType type, bool protect_host_access); - void UnprotectPhysicalMemory(uint32_t physical_address, uint32_t length, - bool unprotect_host_access); + // Physical memory write watching, allowing subsystems to invalidate cached + // data that depends on memory contents. + // + // Placing a watch simply marks the pages (of the system page size) as + // watched, individual watched ranges (or which specific subscribers are + // watching specific pages) are not stored. Because of this, callbacks may be + // triggered multiple times for a single range, and for any watched page every + // registered callbacks is triggered. This is a very simple one-shot method + // for use primarily for cache invalidation - there may be spurious firing, + // for example, if the game only changes the protection level without writing + // anything. + // + // A range of pages can be watched at any time, but pages are only unwatched + // when watches are triggered (since multiple subscribers can depend on the + // same memory, and one subscriber shouldn't interfere with another). + // + // Callbacks can be triggered for one page (if the guest just stores words) or + // for multiple pages (for file reading, protection level changes). + // + // Only guest physical memory mappings are watched - the host-only mapping is + // not protected so it can be used to bypass the write protection (for file + // reads, for example - in this case, watches are triggered manually). + // + // Ranges passed to ProtectAndWatchPhysicalMemory must not contain read-only + // or inaccessible pages - this must be checked externally! Otherwise the MMIO + // handler will make them read-only, but when a read is attempted, it will + // make them read-write! + // + // IMPORTANT NOTE: When a watch is triggered, the watched page is unprotected + // ***ONLY IN THE HEAP WHERE THE ADDRESS IS LOCATED***! Since different + // virtual memory mappings of physical memory can have different protection + // levels for the same pages, and watches must not be placed on read-only or + // totally inaccessible pages, there are significant difficulties with + // synchronizing all the three ranges. + // + // TODO(Triang3l): Allow the callbacks to unwatch regions larger than one page + // (for instance, 64 KB) so there are less access violations. All callbacks + // must agree to unwatch larger ranges because in some cases (like regions + // near the locations that render targets have been resolved to) it is + // necessary to invalidate only a single page and none more. + void* RegisterPhysicalWriteWatch(PhysicalWriteWatchCallback callback, + void* callback_context); + void UnregisterPhysicalWriteWatch(void* watch_handle); + // Force-protects the range in ***ONE SPECIFIC HEAP***, either 0xA0000000, + // 0xC0000000 or 0xE0000000, depending on the higher bits of the address. + void ProtectAndWatchPhysicalMemory(uint32_t physical_address_and_heap, + uint32_t length); - // Fires and clears any access watches that overlap this range. - void InvalidateRange(uint32_t physical_address, size_t length); + // Fires and clears any write watches that overlap this range in one heap. + // Unprotecting can be inhibited if this is called right before applying + // different protection to the same range. + void InvalidateRange(uint32_t physical_address_and_heap, uint32_t length, + bool unprotect = true); // Returns true if /all/ of this range is watched. + // TODO(Triang3l): Remove when legacy watches are removed. bool IsRangeWatched(uint32_t physical_address, size_t length); protected: @@ -98,18 +147,22 @@ class MMIOHandler { void* callback_data; }; + struct PhysicalWriteWatchEntry { + PhysicalWriteWatchCallback callback; + void* callback_context; + }; + MMIOHandler(uint8_t* virtual_membase, uint8_t* physical_membase, - uint8_t* membase_end) - : virtual_membase_(virtual_membase), - physical_membase_(physical_membase), - memory_end_(membase_end) {} + uint8_t* membase_end); static bool ExceptionCallbackThunk(Exception* ex, void* data); bool ExceptionCallback(Exception* ex); void FireAccessWatch(AccessWatchEntry* entry); void ClearAccessWatch(AccessWatchEntry* entry); - bool CheckAccessWatch(uint32_t guest_address); + bool CheckAccessWatch(uint32_t guest_address, uint32_t guest_heap_address); + + uint32_t system_page_size_log2_; uint8_t* virtual_membase_; uint8_t* physical_membase_; @@ -120,8 +173,13 @@ class MMIOHandler { xe::global_critical_region global_critical_region_; // TODO(benvanik): data structure magic. std::list access_watches_; - GlobalAccessWatchCallback global_physical_watch_callback_ = nullptr; - void* global_physical_watch_callback_context_; + std::vector physical_write_watches_; + // For each page, there are 4 bits (16 pages in each word): + // 0 - whether the page is protected in A0000000. + // 1 - whether the page is protected in C0000000. + // 2 - whether the page is protected in E0000000. + // 3 - unused, always zero. + std::vector physical_write_watched_pages_; static MMIOHandler* global_handler_; }; diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index 10e719e80..6c2b63063 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -33,7 +33,6 @@ SharedMemory::SharedMemory(D3D12CommandProcessor* command_processor, assert_true(page_bitmap_length != 0); valid_pages_.resize(page_bitmap_length); - protected_pages_.resize(page_bitmap_length); } SharedMemory::~SharedMemory() { Shutdown(); } @@ -69,19 +68,20 @@ bool SharedMemory::Initialize() { std::memset(valid_pages_.data(), 0, valid_pages_.size() * sizeof(uint64_t)); - std::memset(protected_pages_.data(), 0, - protected_pages_.size() * sizeof(uint64_t)); - upload_buffer_pool_ = std::make_unique(context, 4 * 1024 * 1024); - memory_->SetGlobalPhysicalAccessWatch(MemoryWriteCallbackThunk, this); + physical_write_watch_handle_ = + memory_->RegisterPhysicalWriteWatch(MemoryWriteCallbackThunk, this); return true; } void SharedMemory::Shutdown() { - memory_->SetGlobalPhysicalAccessWatch(nullptr, nullptr); + if (physical_write_watch_handle_ != nullptr) { + memory_->UnregisterPhysicalWriteWatch(physical_write_watch_handle_); + physical_write_watch_handle_ = nullptr; + } upload_buffer_pool_.reset(); @@ -294,6 +294,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { return false; } uint32_t upload_buffer_pages = upload_buffer_size >> page_size_log2_; + // No mutex holding here! MakeRangeValid(upload_range_start, upload_buffer_pages); std::memcpy( upload_buffer_mapping, @@ -310,22 +311,12 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { return true; } -void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) { - start &= kAddressMask; - if (length == 0 || start >= kBufferSize) { - return; - } - length = std::min(length, kBufferSize - start); - uint32_t end = start + length - 1; - uint32_t page_first = start >> page_size_log2_; - uint32_t page_last = end >> page_size_log2_; - uint32_t bucket_first = start >> kWatchBucketSizeLog2; - uint32_t bucket_last = end >> kWatchBucketSizeLog2; +void SharedMemory::FireWatches(uint32_t page_first, uint32_t page_last) { + uint32_t bucket_first = page_first << page_size_log2_ >> kWatchBucketSizeLog2; + uint32_t bucket_last = page_last << page_size_log2_ >> kWatchBucketSizeLog2; std::lock_guard lock(validity_mutex_); - // Trigger modification callbacks so, for instance, resolved data is loaded to - // the texture. for (uint32_t i = bucket_first; i <= bucket_last; ++i) { WatchNode* node = watch_buckets_[i]; while (node != nullptr) { @@ -340,9 +331,25 @@ void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) { } } } +} + +void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) { + start &= kAddressMask; + if (length == 0 || start >= kBufferSize) { + return; + } + length = std::min(length, kBufferSize - start); + uint32_t end = start + length - 1; + uint32_t page_first = start >> page_size_log2_; + uint32_t page_last = end >> page_size_log2_; + + // Trigger modification callbacks so, for instance, resolved data is loaded to + // the texture. + FireWatches(page_first, page_last); // Mark the range as valid (so pages are not reuploaded until modified by the - // CPU) and protect it so the CPU can reuse it. + // CPU) and watch it so the CPU can reuse it and this will be caught. + // No mutex holding here! MakeRangeValid(page_first, page_last - page_first + 1); } @@ -356,23 +363,23 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first, uint32_t valid_block_first = valid_page_first >> 6; uint32_t valid_block_last = valid_page_last >> 6; - std::lock_guard lock(validity_mutex_); + { + std::lock_guard lock(validity_mutex_); - for (uint32_t i = valid_block_first; i <= valid_block_last; ++i) { - uint64_t valid_bits = UINT64_MAX; - if (i == valid_block_first) { - valid_bits &= ~((1ull << (valid_page_first & 63)) - 1); + for (uint32_t i = valid_block_first; i <= valid_block_last; ++i) { + uint64_t valid_bits = UINT64_MAX; + if (i == valid_block_first) { + valid_bits &= ~((1ull << (valid_page_first & 63)) - 1); + } + if (i == valid_block_last && (valid_page_last & 63) != 63) { + valid_bits &= (1ull << ((valid_page_last & 63) + 1)) - 1; + } + valid_pages_[i] |= valid_bits; } - if (i == valid_block_last && (valid_page_last & 63) != 63) { - valid_bits &= (1ull << ((valid_page_last & 63) + 1)) - 1; - } - valid_pages_[i] |= valid_bits; - protected_pages_[i] |= valid_bits; } - memory_->ProtectPhysicalMemory( - valid_page_first << page_size_log2_, valid_page_count << page_size_log2_, - cpu::MMIOHandler::WatchType::kWatchWrite, false); + memory_->WatchPhysicalMemoryWrite(valid_page_first << page_size_log2_, + valid_page_count << page_size_log2_); } void SharedMemory::UnlinkWatchRange(WatchRange* range) { @@ -454,44 +461,32 @@ void SharedMemory::GetRangesToUpload(uint32_t request_page_first, } } -bool SharedMemory::MemoryWriteCallbackThunk(void* context_ptr, - uint32_t address) { - SharedMemory* shared_memory = reinterpret_cast(context_ptr); - return shared_memory->MemoryWriteCallback(address); +void SharedMemory::MemoryWriteCallbackThunk(void* context_ptr, + uint32_t page_first, + uint32_t page_last) { + reinterpret_cast(context_ptr) + ->MemoryWriteCallback(page_first, page_last); } -bool SharedMemory::MemoryWriteCallback(uint32_t address) { - uint32_t page_index = (address & kAddressMask) >> page_size_log2_; - uint32_t block_index = page_index >> 6; - uint64_t page_bit = 1ull << (page_index & 63); +void SharedMemory::MemoryWriteCallback(uint32_t page_first, + uint32_t page_last) { + uint32_t block_first = page_first >> 6; + uint32_t block_last = page_last >> 6; std::lock_guard lock(validity_mutex_); - if (!(protected_pages_[block_index] & page_bit)) { - return false; - } - - valid_pages_[block_index] &= ~page_bit; - - // Trigger watch callbacks. - WatchNode* node = - watch_buckets_[page_index << page_size_log2_ >> kWatchBucketSizeLog2]; - while (node != nullptr) { - WatchRange* range = node->range; - // Store the next node now since when the callback is triggered, the links - // will be broken. - node = node->bucket_node_next; - if (page_index >= range->page_first && page_index <= range->page_last) { - range->callback(range->callback_context, range->callback_data, - range->callback_argument); - UnlinkWatchRange(range); + for (uint32_t i = block_first; i <= block_last; ++i) { + uint64_t invalidate_bits = UINT64_MAX; + if (i == block_first) { + invalidate_bits &= ~((1ull << (page_first & 63)) - 1); } + if (i == block_last && (page_last & 63) != 63) { + invalidate_bits &= (1ull << ((page_last & 63) + 1)) - 1; + } + valid_pages_[i] &= ~invalidate_bits; } - memory_->UnprotectPhysicalMemory(page_index << page_size_log2_, - 1 << page_size_log2_, false); - protected_pages_[block_index] &= ~page_bit; - return true; + FireWatches(page_first, page_last); } void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state) { diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index 0d126be9e..accd1bb2d 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -93,6 +93,10 @@ class SharedMemory { void CreateRawUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle); private: + // Mark the memory range as updated and protect it. The validity mutex must + // NOT be held when calling!!! + void MakeRangeValid(uint32_t valid_page_first, uint32_t valid_page_count); + D3D12CommandProcessor* command_processor_; Memory* memory_; @@ -121,6 +125,9 @@ class SharedMemory { // Total physical page count. uint32_t page_count_; + // Handle of the physical memory write callback. + void* physical_write_watch_handle_ = nullptr; + // Mutex between the exception handler and the command processor, to be locked // when checking or updating validity of pages/ranges. std::recursive_mutex validity_mutex_; @@ -131,14 +138,11 @@ class SharedMemory { // Bit vector containing whether physical memory system pages are up to date. std::vector valid_pages_; - // Mark the memory range as updated and protect it. - void MakeRangeValid(uint32_t valid_page_first, uint32_t valid_page_count); - // Whether each physical page is protected by the GPU code (after uploading). - std::vector protected_pages_; // Memory access callback. - static bool MemoryWriteCallbackThunk(void* context_ptr, uint32_t address); - bool MemoryWriteCallback(uint32_t address); + static void MemoryWriteCallbackThunk(void* context_ptr, uint32_t page_first, + uint32_t page_last); + void MemoryWriteCallback(uint32_t page_first, uint32_t page_last); struct WatchNode; // Watched range placed by other GPU subsystems. @@ -187,6 +191,8 @@ class SharedMemory { uint32_t watch_node_current_pool_allocated_ = 0; WatchRange* watch_range_first_free_ = nullptr; WatchNode* watch_node_first_free_ = nullptr; + // Triggers the watches, removing them when triggered. + void FireWatches(uint32_t page_first, uint32_t page_last); // Unlinks and frees the range and its nodes. Call this with the mutex locked. void UnlinkWatchRange(WatchRange* range); diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc index c3bbb5e7e..4d8bd06b6 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc @@ -169,9 +169,8 @@ dword_result_t NtReadFile(dword_t file_handle, dword_t event_handle, // some games NtReadFile() directly into texture memory // TODO(rick): better checking of physical address if (buffer.guest_address() >= 0xA0000000) { - auto heap = kernel_memory()->LookupHeap(buffer.guest_address()); cpu::MMIOHandler::global_handler()->InvalidateRange( - heap->GetPhysicalAddress(buffer.guest_address()), buffer_length); + buffer.guest_address(), buffer_length); } // Synchronous. diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index 084358218..2706fdd94 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -412,22 +412,21 @@ void Memory::CancelAccessWatch(uintptr_t watch_handle) { mmio_handler_->CancelAccessWatch(watch_handle); } -void Memory::SetGlobalPhysicalAccessWatch( - cpu::GlobalAccessWatchCallback callback, void* callback_context) { - mmio_handler_->SetGlobalPhysicalAccessWatch(callback, callback_context); +void* Memory::RegisterPhysicalWriteWatch( + cpu::PhysicalWriteWatchCallback callback, void* callback_context) { + return mmio_handler_->RegisterPhysicalWriteWatch(callback, callback_context); } -void Memory::ProtectPhysicalMemory(uint32_t physical_address, uint32_t length, - cpu::MMIOHandler::WatchType type, - bool protect_host_access) { - mmio_handler_->ProtectPhysicalMemory(physical_address, length, type, - protect_host_access); +void Memory::UnregisterPhysicalWriteWatch(void* watch_handle) { + mmio_handler_->UnregisterPhysicalWriteWatch(watch_handle); } -void Memory::UnprotectPhysicalMemory(uint32_t physical_address, uint32_t length, - bool unprotect_host_access) { - mmio_handler_->UnprotectPhysicalMemory(physical_address, length, - unprotect_host_access); +void Memory::WatchPhysicalMemoryWrite(uint32_t physical_address, + uint32_t length) { + // Watch independently in all three mappings. + heaps_.vA0000000.WatchWrite(physical_address, length, mmio_handler_.get()); + heaps_.vC0000000.WatchWrite(physical_address, length, mmio_handler_.get()); + heaps_.vE0000000.WatchWrite(physical_address, length, mmio_handler_.get()); } uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment, @@ -1363,8 +1362,8 @@ bool PhysicalHeap::Release(uint32_t base_address, uint32_t* out_region_size) { uint32_t parent_base_address = GetPhysicalAddress(base_address); uint32_t region_size = 0; if (QuerySize(base_address, ®ion_size)) { - cpu::MMIOHandler::global_handler()->InvalidateRange(parent_base_address, - region_size); + cpu::MMIOHandler::global_handler()->InvalidateRange( + base_address, region_size, !FLAGS_protect_on_release); } if (!parent_heap_->Release(parent_base_address, out_region_size)) { @@ -1378,10 +1377,11 @@ bool PhysicalHeap::Release(uint32_t base_address, uint32_t* out_region_size) { bool PhysicalHeap::Protect(uint32_t address, uint32_t size, uint32_t protect, uint32_t* old_protect) { auto global_lock = global_critical_region_.Acquire(); - uint32_t parent_address = GetPhysicalAddress(address); - cpu::MMIOHandler::global_handler()->InvalidateRange(parent_address, size); - if (!parent_heap_->Protect(parent_address, size, protect, old_protect)) { + cpu::MMIOHandler::global_handler()->InvalidateRange(address, size, false); + + if (!parent_heap_->Protect(GetPhysicalAddress(address), size, protect, + old_protect)) { XELOGE("PhysicalHeap::Protect failed due to parent heap failure"); return false; } @@ -1389,4 +1389,47 @@ bool PhysicalHeap::Protect(uint32_t address, uint32_t size, uint32_t protect, return BaseHeap::Protect(address, size, protect); } +void PhysicalHeap::WatchWrite(uint32_t address, uint32_t size, + cpu::MMIOHandler* mmio_handler) { + address &= 0x1FFFFFFF; + if (address >= heap_size_) { + // E0000000 is not exactly 512 MB long. + return; + } + size = std::min(size, heap_size_ - address); + if (size == 0) { + return; + } + + uint32_t system_page_size = uint32_t(xe::memory::page_size()); + uint32_t system_page_first = address / system_page_size; + uint32_t system_page_last = (address + size - 1) / system_page_size; + + auto global_lock = global_critical_region_.Acquire(); + + // Watch all writable pages of the system page size within the requested + // range. + uint32_t range_start = UINT32_MAX; + for (uint32_t i = system_page_first; i <= system_page_last; ++i) { + if (page_table_[i * system_page_size / page_size_].current_protect & + kMemoryProtectWrite) { + if (range_start == UINT32_MAX) { + range_start = i; + } + } else { + if (range_start != UINT32_MAX) { + mmio_handler->ProtectAndWatchPhysicalMemory( + heap_base_ + range_start * system_page_size, + (i - range_start) * system_page_size); + range_start = UINT32_MAX; + } + } + } + if (range_start != UINT32_MAX) { + mmio_handler->ProtectAndWatchPhysicalMemory( + heap_base_ + range_start * system_page_size, + (system_page_last - range_start + 1) * system_page_size); + } +} + } // namespace xe diff --git a/src/xenia/memory.h b/src/xenia/memory.h index 2229c951a..94059b15c 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -215,6 +215,9 @@ class PhysicalHeap : public BaseHeap { bool Protect(uint32_t address, uint32_t size, uint32_t protect, uint32_t* old_protect = nullptr) override; + void WatchWrite(uint32_t address, uint32_t size, + cpu::MMIOHandler* mmio_handler); + protected: VirtualHeap* parent_heap_; }; @@ -319,23 +322,26 @@ class Memory { // Cancels a write watch requested with AddPhysicalAccessWatch. void CancelAccessWatch(uintptr_t watch_handle); - // Sets the default access watch callback for physical memory, which has a - // higher priority than watches - if it returns true, watches won't be - // triggered. - void SetGlobalPhysicalAccessWatch(cpu::GlobalAccessWatchCallback callback, - void* callback_context); + // Registers a global callback for physical memory writes. See + // cpu/mmio_handler.h for more details about physical memory write watches. + void* RegisterPhysicalWriteWatch(cpu::PhysicalWriteWatchCallback callback, + void* callback_context); - // Protects a physical memory range without adding a watch, primarily for use - // with the global physical access watch. - void ProtectPhysicalMemory(uint32_t physical_address, uint32_t length, - cpu::MMIOHandler::WatchType type, - bool protect_host_access); + // Unregisters a physical memory write watch previously added with + // RegisterPhysicalWriteWatch. + void UnregisterPhysicalWriteWatch(void* watch_handle); - // Unprotects a physical memory range previously protected using - // ProtectPhysicalMemory, primarily for use with the global physical access - // watch. - void UnprotectPhysicalMemory(uint32_t physical_address, uint32_t length, - bool unprotect_host_access); + // Enables watching of the specified memory range, snapped to system page + // boundaries. When something is written to a watched range (or when the + // protection of it changes), the registered watch callbacks are triggered for + // the page (or pages, for file reads and protection changes) where something + // has been written to. This protects physical memory only under + // virtual_membase_, so writing to physical_membase_ can be written to bypass + // the protection placed by the watches. Read-only and inaccessible pages are + // silently ignored, only attempts to write to read-write pages will trigger + // watch callbacks. + // AVOID CALLING WITH MUTEXES LOCKED!!! + void WatchPhysicalMemoryWrite(uint32_t physical_address, uint32_t length); // Allocates virtual memory from the 'system' heap. // System memory is kept separate from game memory but is still accessible