From 4aceeb73c436b40b06614ba3f52a7b39efa17854 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 30 Jul 2019 08:00:20 +0300 Subject: [PATCH] [Memory] Move new watches to heap-aware Memory from MMIOHandler --- src/xenia/base/exception_handler.h | 17 +- src/xenia/base/exception_handler_win.cc | 22 +- src/xenia/base/memory.h | 2 +- src/xenia/cpu/mmio_handler.cc | 216 ++---------- src/xenia/cpu/mmio_handler.h | 88 +---- src/xenia/gpu/d3d12/primitive_converter.cc | 15 +- src/xenia/gpu/d3d12/primitive_converter.h | 7 +- src/xenia/gpu/d3d12/shared_memory.cc | 31 +- src/xenia/gpu/d3d12/shared_memory.h | 24 +- src/xenia/gpu/d3d12/texture_cache.cc | 44 +-- src/xenia/memory.cc | 367 ++++++++++++++++++--- src/xenia/memory.h | 89 ++++- 12 files changed, 535 insertions(+), 387 deletions(-) diff --git a/src/xenia/base/exception_handler.h b/src/xenia/base/exception_handler.h index dba2cfb42..cff15ab1b 100644 --- a/src/xenia/base/exception_handler.h +++ b/src/xenia/base/exception_handler.h @@ -26,11 +26,19 @@ class Exception { kIllegalInstruction, }; + enum class AccessViolationOperation { + kUnknown, + kRead, + kWrite, + }; + void InitializeAccessViolation(X64Context* thread_context, - uint64_t fault_address) { + uint64_t fault_address, + AccessViolationOperation operation) { code_ = Code::kAccessViolation; thread_context_ = thread_context; fault_address_ = fault_address; + access_violation_operation_ = operation; } void InitializeIllegalInstruction(X64Context* thread_context) { code_ = Code::kIllegalInstruction; @@ -62,10 +70,17 @@ class Exception { // In case of AV, address that was read from/written to. uint64_t fault_address() const { return fault_address_; } + // In case of AV, what kind of operation caused it. + AccessViolationOperation access_violation_operation() const { + return access_violation_operation_; + } + private: Code code_ = Code::kInvalidException; X64Context* thread_context_ = nullptr; uint64_t fault_address_ = 0; + AccessViolationOperation access_violation_operation_ = + AccessViolationOperation::kUnknown; }; class ExceptionHandler { diff --git a/src/xenia/base/exception_handler_win.cc b/src/xenia/base/exception_handler_win.cc index 3fde109d6..6f2ae3216 100644 --- a/src/xenia/base/exception_handler_win.cc +++ b/src/xenia/base/exception_handler_win.cc @@ -51,10 +51,26 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) { case STATUS_ILLEGAL_INSTRUCTION: ex.InitializeIllegalInstruction(&thread_context); break; - case STATUS_ACCESS_VIOLATION: + case STATUS_ACCESS_VIOLATION: { + Exception::AccessViolationOperation access_violation_operation; + switch (ex_info->ExceptionRecord->ExceptionInformation[0]) { + case 0: + access_violation_operation = + Exception::AccessViolationOperation::kRead; + break; + case 1: + access_violation_operation = + Exception::AccessViolationOperation::kWrite; + break; + default: + access_violation_operation = + Exception::AccessViolationOperation::kUnknown; + break; + } ex.InitializeAccessViolation( - &thread_context, ex_info->ExceptionRecord->ExceptionInformation[1]); - break; + &thread_context, ex_info->ExceptionRecord->ExceptionInformation[1], + access_violation_operation); + } break; default: // Unknown/unhandled type. return EXCEPTION_CONTINUE_SEARCH; diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h index 409d3a0a6..3819f9dbe 100644 --- a/src/xenia/base/memory.h +++ b/src/xenia/base/memory.h @@ -62,7 +62,7 @@ bool DeallocFixed(void* base_address, size_t length, // Sets the access rights for the given block of memory and returns the previous // access rights. Both base_address and length will be adjusted to page_size(). bool Protect(void* base_address, size_t length, PageAccess access, - PageAccess* out_old_access); + PageAccess* out_old_access = nullptr); // Queries a region of pages to get the access rights. This will modify the // length parameter to the length of pages with the same consecutive access diff --git a/src/xenia/cpu/mmio_handler.cc b/src/xenia/cpu/mmio_handler.cc index e6cbb24ca..e104c887f 100644 --- a/src/xenia/cpu/mmio_handler.cc +++ b/src/xenia/cpu/mmio_handler.cc @@ -24,17 +24,19 @@ namespace cpu { MMIOHandler* MMIOHandler::global_handler_ = nullptr; -std::unique_ptr MMIOHandler::Install(uint8_t* virtual_membase, - uint8_t* physical_membase, - uint8_t* membase_end) { +std::unique_ptr MMIOHandler::Install( + uint8_t* virtual_membase, uint8_t* physical_membase, uint8_t* membase_end, + AccessViolationCallback access_violation_callback, + void* access_violation_callback_context) { // There can be only one handler at a time. assert_null(global_handler_); if (global_handler_) { return nullptr; } - auto handler = std::unique_ptr( - new MMIOHandler(virtual_membase, physical_membase, membase_end)); + auto handler = std::unique_ptr(new MMIOHandler( + virtual_membase, physical_membase, membase_end, access_violation_callback, + access_violation_callback_context)); // Install the exception handler directed at the MMIOHandler. ExceptionHandler::Install(ExceptionCallbackThunk, handler.get()); @@ -44,18 +46,14 @@ std::unique_ptr MMIOHandler::Install(uint8_t* virtual_membase, } MMIOHandler::MMIOHandler(uint8_t* virtual_membase, uint8_t* physical_membase, - uint8_t* membase_end) + uint8_t* membase_end, + AccessViolationCallback access_violation_callback, + void* access_violation_callback_context) : virtual_membase_(virtual_membase), physical_membase_(physical_membase), - memory_end_(membase_end) { - system_page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size())); - - uint32_t physical_page_count = (512 * 1024 * 1024) >> system_page_size_log2_; - physical_write_watched_pages_.resize(physical_page_count >> 4); - assert_true(physical_write_watched_pages_.size() != 0); - std::memset(physical_write_watched_pages_.data(), 0, - physical_write_watched_pages_.size() * sizeof(uint64_t)); -} + memory_end_(membase_end), + access_violation_callback_(access_violation_callback), + access_violation_callback_context_(access_violation_callback_context) {} MMIOHandler::~MMIOHandler() { ExceptionHandler::Uninstall(ExceptionCallbackThunk, this); @@ -231,86 +229,8 @@ void MMIOHandler::CancelAccessWatch(uintptr_t watch_handle) { delete entry; } -void* MMIOHandler::RegisterPhysicalWriteWatch( - PhysicalWriteWatchCallback callback, void* callback_context) { - PhysicalWriteWatchEntry* entry = new PhysicalWriteWatchEntry; - entry->callback = callback; - entry->callback_context = callback_context; - - auto lock = global_critical_region_.Acquire(); - physical_write_watches_.push_back(entry); - - return entry; -} - -void MMIOHandler::UnregisterPhysicalWriteWatch(void* watch_handle) { - auto entry = reinterpret_cast(watch_handle); - - { - auto lock = global_critical_region_.Acquire(); - auto it = std::find(physical_write_watches_.begin(), - physical_write_watches_.end(), entry); - assert_false(it == physical_write_watches_.end()); - if (it != physical_write_watches_.end()) { - physical_write_watches_.erase(it); - } - } - - delete entry; -} - -void MMIOHandler::ProtectAndWatchPhysicalMemory( - uint32_t physical_address_and_heap, uint32_t length) { - // Bits to set in 16-bit blocks to mark that the pages are protected. - uint64_t block_heap_mask; - if (physical_address_and_heap >= 0xE0000000) { - block_heap_mask = 0x4444444444444444ull; - } else if (physical_address_and_heap >= 0xC0000000) { - block_heap_mask = 0x2222222222222222ull; - } else if (physical_address_and_heap >= 0xA0000000) { - block_heap_mask = 0x1111111111111111ull; - } else { - assert_always(); - return; - } - - uint32_t heap_relative_address = physical_address_and_heap & 0x1FFFFFFF; - length = std::min(length, 0x20000000u - heap_relative_address); - if (length == 0) { - return; - } - - uint32_t page_first = heap_relative_address >> system_page_size_log2_; - uint32_t page_last = - (heap_relative_address + length - 1) >> system_page_size_log2_; - uint32_t block_first = page_first >> 4; - uint32_t block_last = page_last >> 4; - - auto lock = global_critical_region_.Acquire(); - - // Set the bits indicating that the pages are watched and access violations - // there are intentional. - for (uint32_t i = block_first; i <= block_last; ++i) { - uint64_t block_set_bits = block_heap_mask; - if (i == block_first) { - block_set_bits &= ~((1ull << ((page_first & 15) * 4)) - 1); - } - if (i == block_last && (page_last & 15) != 15) { - block_set_bits &= (1ull << (((page_last & 15) + 1) * 4)) - 1; - } - physical_write_watched_pages_[i] |= block_set_bits; - } - - // Protect only in one range (due to difficulties synchronizing protection - // levels between those ranges). - memory::Protect(virtual_membase_ + (physical_address_and_heap & ~0x1FFFFFFF) + - (page_first << system_page_size_log2_), - (page_last - page_first + 1) << system_page_size_log2_, - memory::PageAccess::kReadOnly, nullptr); -} - void MMIOHandler::InvalidateRange(uint32_t physical_address_and_heap, - uint32_t length, bool unprotect) { + uint32_t length) { uint32_t heap_relative_address = physical_address_and_heap & 0x1FFFFFFF; length = std::min(length, 0x20000000u - heap_relative_address); if (length == 0) { @@ -319,61 +239,6 @@ void MMIOHandler::InvalidateRange(uint32_t physical_address_and_heap, auto lock = global_critical_region_.Acquire(); - // Trigger the new (per-page) watches and unwatch the pages. - if (physical_address_and_heap >= 0xA0000000) { - uint32_t heap_address = physical_address_and_heap & ~0x1FFFFFFF; - uint64_t heap_bit; - if (heap_address >= 0xE0000000) { - heap_bit = 1 << 2; - } else if (heap_address >= 0xC0000000) { - heap_bit = 1 << 1; - } else { - heap_bit = 1 << 0; - } - uint32_t page_first = heap_relative_address >> system_page_size_log2_; - uint32_t page_last = - (heap_relative_address + length - 1) >> system_page_size_log2_; - uint32_t range_start = UINT32_MAX; - for (uint32_t i = page_first; i <= page_last; ++i) { - uint64_t page_heap_bit = heap_bit << ((i & 15) * 4); - if (physical_write_watched_pages_[i >> 4] & page_heap_bit) { - if (range_start == UINT32_MAX) { - range_start = i; - } - physical_write_watched_pages_[i >> 4] &= ~page_heap_bit; - } else { - if (range_start != UINT32_MAX) { - for (auto it = physical_write_watches_.begin(); - it != physical_write_watches_.end(); ++it) { - auto entry = *it; - entry->callback(entry->callback_context, range_start, i - 1); - } - if (unprotect) { - memory::Protect(virtual_membase_ + heap_address + - (range_start << system_page_size_log2_), - (i - range_start) << system_page_size_log2_, - xe::memory::PageAccess::kReadWrite, nullptr); - } - range_start = UINT32_MAX; - } - } - } - if (range_start != UINT32_MAX) { - for (auto it = physical_write_watches_.begin(); - it != physical_write_watches_.end(); ++it) { - auto entry = *it; - entry->callback(entry->callback_context, range_start, page_last); - if (unprotect) { - memory::Protect(virtual_membase_ + heap_address + - (range_start << system_page_size_log2_), - (page_last - range_start + 1) - << system_page_size_log2_, - xe::memory::PageAccess::kReadWrite, nullptr); - } - } - } - } - // Trigger the legacy (per-range) watches. for (auto it = access_watches_.begin(); it != access_watches_.end();) { auto entry = *it; @@ -421,38 +286,8 @@ bool MMIOHandler::CheckAccessWatch(uint32_t physical_address, uint32_t heap_address) { bool hit = false; - // Trigger new (per-page) access watches. - if (heap_address >= 0xA0000000) { - uint32_t page_index = physical_address >> system_page_size_log2_; - // Check the watch only for the virtual memory mapping it was triggered in, - // because as guest protection levels may be different for different - // mappings of the physical memory, it's difficult to synchronize protection - // between the mappings. - uint64_t heap_bit; - if (heap_address >= 0xE0000000) { - heap_bit = 1 << 2; - } else if (heap_address >= 0xC0000000) { - heap_bit = 1 << 1; - } else { - heap_bit = 1 << 0; - } - heap_bit <<= (page_index & 15) * 4; - if (physical_write_watched_pages_[page_index >> 4] & heap_bit) { - hit = true; - memory::Protect(virtual_membase_ + heap_address + - (page_index << system_page_size_log2_), - size_t(1) << system_page_size_log2_, - xe::memory::PageAccess::kReadWrite, nullptr); - physical_write_watched_pages_[page_index >> 4] &= ~heap_bit; - for (auto it = physical_write_watches_.begin(); - it != physical_write_watches_.end(); ++it) { - auto entry = *it; - entry->callback(entry->callback_context, page_index, page_index); - } - } - } - // Trigger legacy (per-range) access watches. + // TODO(Triang3l): Remove when legacy watches are deleted. auto lock = global_critical_region_.Acquire(); for (auto it = access_watches_.begin(); it != access_watches_.end();) { auto entry = *it; @@ -694,9 +529,24 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) { // Access is not found within any range, so fail and let the caller handle // it (likely by aborting). - // TODO(Triang3l): Don't call for the host physical memory view when legacy - // watches are removed. - return CheckAccessWatch(guest_address, guest_heap_address); + // TODO(Triang3l): Remove legacy CheckAccessWatch, only call the callback. + bool hit = CheckAccessWatch(guest_address, guest_heap_address); + if (access_violation_callback_) { + switch (ex->access_violation_operation()) { + case Exception::AccessViolationOperation::kRead: + hit |= access_violation_callback_(access_violation_callback_context_, + size_t(ex->fault_address()), false); + break; + case Exception::AccessViolationOperation::kWrite: + hit |= access_violation_callback_(access_violation_callback_context_, + size_t(ex->fault_address()), true); + break; + default: + // Data Execution Prevention or something else uninteresting. + break; + } + } + return hit; } auto rip = ex->pc(); diff --git a/src/xenia/cpu/mmio_handler.h b/src/xenia/cpu/mmio_handler.h index c8749e807..92511052a 100644 --- a/src/xenia/cpu/mmio_handler.h +++ b/src/xenia/cpu/mmio_handler.h @@ -30,9 +30,6 @@ typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context, uint32_t addr, uint32_t value); typedef void (*AccessWatchCallback)(void* context_ptr, void* data_ptr, uint32_t address); -typedef void (*PhysicalWriteWatchCallback)(void* context_ptr, - uint32_t page_first, - uint32_t page_last); struct MMIORange { uint32_t address; @@ -48,15 +45,22 @@ class MMIOHandler { public: virtual ~MMIOHandler(); + typedef bool (*AccessViolationCallback)(void* context, size_t host_address, + bool is_write); + enum WatchType { kWatchInvalid = 0, kWatchWrite = 1, kWatchReadWrite = 2, }; - static std::unique_ptr Install(uint8_t* virtual_membase, - uint8_t* physical_membase, - uint8_t* membase_end); + // access_violation_callback is called in global_critical_region, so if + // multiple threads trigger an access violation in the same page, the callback + // will be called only once. + static std::unique_ptr Install( + uint8_t* virtual_membase, uint8_t* physical_membase, uint8_t* membase_end, + AccessViolationCallback access_violation_callback, + void* access_violation_callback_context); static MMIOHandler* global_handler() { return global_handler_; } bool RegisterRange(uint32_t virtual_address, uint32_t mask, uint32_t size, @@ -79,59 +83,10 @@ class MMIOHandler { void* callback_context, void* callback_data); void CancelAccessWatch(uintptr_t watch_handle); - // Physical memory write watching, allowing subsystems to invalidate cached - // data that depends on memory contents. - // - // Placing a watch simply marks the pages (of the system page size) as - // watched, individual watched ranges (or which specific subscribers are - // watching specific pages) are not stored. Because of this, callbacks may be - // triggered multiple times for a single range, and for any watched page every - // registered callbacks is triggered. This is a very simple one-shot method - // for use primarily for cache invalidation - there may be spurious firing, - // for example, if the game only changes the protection level without writing - // anything. - // - // A range of pages can be watched at any time, but pages are only unwatched - // when watches are triggered (since multiple subscribers can depend on the - // same memory, and one subscriber shouldn't interfere with another). - // - // Callbacks can be triggered for one page (if the guest just stores words) or - // for multiple pages (for file reading, protection level changes). - // - // Only guest physical memory mappings are watched - the host-only mapping is - // not protected so it can be used to bypass the write protection (for file - // reads, for example - in this case, watches are triggered manually). - // - // Ranges passed to ProtectAndWatchPhysicalMemory must not contain read-only - // or inaccessible pages - this must be checked externally! Otherwise the MMIO - // handler will make them read-only, but when a read is attempted, it will - // make them read-write! - // - // IMPORTANT NOTE: When a watch is triggered, the watched page is unprotected - // ***ONLY IN THE HEAP WHERE THE ADDRESS IS LOCATED***! Since different - // virtual memory mappings of physical memory can have different protection - // levels for the same pages, and watches must not be placed on read-only or - // totally inaccessible pages, there are significant difficulties with - // synchronizing all the three ranges. - // - // TODO(Triang3l): Allow the callbacks to unwatch regions larger than one page - // (for instance, 64 KB) so there are less access violations. All callbacks - // must agree to unwatch larger ranges because in some cases (like regions - // near the locations that render targets have been resolved to) it is - // necessary to invalidate only a single page and none more. - void* RegisterPhysicalWriteWatch(PhysicalWriteWatchCallback callback, - void* callback_context); - void UnregisterPhysicalWriteWatch(void* watch_handle); - // Force-protects the range in ***ONE SPECIFIC HEAP***, either 0xA0000000, - // 0xC0000000 or 0xE0000000, depending on the higher bits of the address. - void ProtectAndWatchPhysicalMemory(uint32_t physical_address_and_heap, - uint32_t length); - // Fires and clears any write watches that overlap this range in one heap. // Unprotecting can be inhibited if this is called right before applying // different protection to the same range. - void InvalidateRange(uint32_t physical_address_and_heap, uint32_t length, - bool unprotect = true); + void InvalidateRange(uint32_t physical_address_and_heap, uint32_t length); // Returns true if /all/ of this range is watched. // TODO(Triang3l): Remove when legacy watches are removed. @@ -147,13 +102,10 @@ class MMIOHandler { void* callback_data; }; - struct PhysicalWriteWatchEntry { - PhysicalWriteWatchCallback callback; - void* callback_context; - }; - MMIOHandler(uint8_t* virtual_membase, uint8_t* physical_membase, - uint8_t* membase_end); + uint8_t* membase_end, + AccessViolationCallback access_violation_callback, + void* access_violation_callback_context); static bool ExceptionCallbackThunk(Exception* ex, void* data); bool ExceptionCallback(Exception* ex); @@ -162,24 +114,18 @@ class MMIOHandler { void ClearAccessWatch(AccessWatchEntry* entry); bool CheckAccessWatch(uint32_t guest_address, uint32_t guest_heap_address); - uint32_t system_page_size_log2_; - uint8_t* virtual_membase_; uint8_t* physical_membase_; uint8_t* memory_end_; std::vector mapped_ranges_; + AccessViolationCallback access_violation_callback_; + void* access_violation_callback_context_; + xe::global_critical_region global_critical_region_; // TODO(benvanik): data structure magic. std::list access_watches_; - std::vector physical_write_watches_; - // For each page, there are 4 bits (16 pages in each word): - // 0 - whether the page is protected in A0000000. - // 1 - whether the page is protected in C0000000. - // 2 - whether the page is protected in E0000000. - // 3 - unused, always zero. - std::vector physical_write_watched_pages_; static MMIOHandler* global_handler_; }; diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index 23caef88e..c79799d0f 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -699,11 +699,11 @@ void* PrimitiveConverter::AllocateIndices( return mapping + simd_offset; } -void PrimitiveConverter::MemoryWriteCallback(uint32_t page_first, - uint32_t page_last) { +void PrimitiveConverter::MemoryWriteCallback(uint32_t physical_address_start, + uint32_t length) { // 1 bit = (512 / 64) MB = 8 MB. Invalidate a region of this size. - uint32_t bit_index_first = (page_first * system_page_size_) >> 23; - uint32_t bit_index_last = (page_last * system_page_size_) >> 23; + uint32_t bit_index_first = physical_address_start >> 23; + uint32_t bit_index_last = (physical_address_start + length - 1) >> 23; uint64_t bits = ~((1ull << bit_index_first) - 1); if (bit_index_last < 63) { bits &= (1ull << (bit_index_last + 1)) - 1; @@ -711,11 +711,10 @@ void PrimitiveConverter::MemoryWriteCallback(uint32_t page_first, memory_regions_invalidated_ |= bits; } -void PrimitiveConverter::MemoryWriteCallbackThunk(void* context_ptr, - uint32_t page_first, - uint32_t page_last) { +void PrimitiveConverter::MemoryWriteCallbackThunk( + void* context_ptr, uint32_t physical_address_start, uint32_t length) { reinterpret_cast(context_ptr) - ->MemoryWriteCallback(page_first, page_last); + ->MemoryWriteCallback(physical_address_start, length); } D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer( diff --git a/src/xenia/gpu/d3d12/primitive_converter.h b/src/xenia/gpu/d3d12/primitive_converter.h index 933e1bd93..79aba99c3 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.h +++ b/src/xenia/gpu/d3d12/primitive_converter.h @@ -89,9 +89,10 @@ class PrimitiveConverter { D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out); // Callback for invalidating buffers mid-frame. - void MemoryWriteCallback(uint32_t page_first, uint32_t page_last); - static void MemoryWriteCallbackThunk(void* context_ptr, uint32_t page_first, - uint32_t page_last); + void MemoryWriteCallback(uint32_t physical_address_start, uint32_t length); + static void MemoryWriteCallbackThunk(void* context_ptr, + uint32_t physical_address_start, + uint32_t length); D3D12CommandProcessor* command_processor_; RegisterFile* register_file_; diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index 662d7756c..6ad37e2f1 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -172,7 +172,7 @@ SharedMemory::GlobalWatchHandle SharedMemory::RegisterGlobalWatch( watch->callback = callback; watch->callback_context = callback_context; - std::lock_guard lock(validity_mutex_); + auto global_lock = global_critical_region_.Acquire(); global_watches_.push_back(watch); return reinterpret_cast(watch); @@ -182,7 +182,7 @@ void SharedMemory::UnregisterGlobalWatch(GlobalWatchHandle handle) { auto watch = reinterpret_cast(handle); { - std::lock_guard lock(validity_mutex_); + auto global_lock = global_critical_region_.Acquire(); auto it = std::find(global_watches_.begin(), global_watches_.end(), watch); assert_false(it == global_watches_.end()); if (it != global_watches_.end()) { @@ -208,7 +208,7 @@ SharedMemory::WatchHandle SharedMemory::WatchMemoryRange( uint32_t bucket_last = watch_page_last << page_size_log2_ >> kWatchBucketSizeLog2; - std::lock_guard lock(validity_mutex_); + auto global_lock = global_critical_region_.Acquire(); // Allocate the range. WatchRange* range = watch_range_first_free_; @@ -267,7 +267,7 @@ void SharedMemory::UnwatchMemoryRange(WatchHandle handle) { // Could be a zero length range. return; } - std::lock_guard lock(validity_mutex_); + auto global_lock = global_critical_region_.Acquire(); UnlinkWatchRange(reinterpret_cast(handle)); } @@ -405,7 +405,7 @@ void SharedMemory::FireWatches(uint32_t page_first, uint32_t page_last, uint32_t bucket_first = address_first >> kWatchBucketSizeLog2; uint32_t bucket_last = address_last >> kWatchBucketSizeLog2; - std::lock_guard lock(validity_mutex_); + auto global_lock = global_critical_region_.Acquire(); // Fire global watches. for (const auto global_watch : global_watches_) { @@ -472,7 +472,7 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first, uint32_t valid_block_last = valid_page_last >> 6; { - std::lock_guard lock(validity_mutex_); + auto global_lock = global_critical_region_.Acquire(); for (uint32_t i = valid_block_first; i <= valid_block_last; ++i) { uint64_t valid_bits = UINT64_MAX; @@ -523,7 +523,7 @@ void SharedMemory::GetRangesToUpload(uint32_t request_page_first, uint32_t request_block_first = request_page_first >> 6; uint32_t request_block_last = request_page_last >> 6; - std::lock_guard lock(validity_mutex_); + auto global_lock = global_critical_region_.Acquire(); uint32_t range_start = UINT32_MAX; for (uint32_t i = request_block_first; i <= request_block_last; ++i) { @@ -570,18 +570,23 @@ void SharedMemory::GetRangesToUpload(uint32_t request_page_first, } void SharedMemory::MemoryWriteCallbackThunk(void* context_ptr, - uint32_t page_first, - uint32_t page_last) { + uint32_t physical_address_start, + uint32_t length) { reinterpret_cast(context_ptr) - ->MemoryWriteCallback(page_first, page_last); + ->MemoryWriteCallback(physical_address_start, length); } -void SharedMemory::MemoryWriteCallback(uint32_t page_first, - uint32_t page_last) { +void SharedMemory::MemoryWriteCallback(uint32_t physical_address_start, + uint32_t length) { + if (length == 0) { + return; + } + uint32_t page_first = physical_address_start >> page_size_log2_; + uint32_t page_last = (physical_address_start + length - 1) >> page_size_log2_; uint32_t block_first = page_first >> 6; uint32_t block_last = page_last >> 6; - std::lock_guard lock(validity_mutex_); + auto global_lock = global_critical_region_.Acquire(); for (uint32_t i = block_first; i <= block_last; ++i) { uint64_t invalidate_bits = UINT64_MAX; diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index c1c27419a..2a6b78c71 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -14,6 +14,7 @@ #include #include +#include "xenia/base/mutex.h" #include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_api.h" #include "xenia/ui/d3d12/pools.h" @@ -73,7 +74,7 @@ class SharedMemory { // additional subsystem/object-specific data (such as whether the range // belongs to the base mip level or to the rest of the mips). // - // The callback is called with the mutex locked. Do NOT watch or unwatch + // Called with the global critical region locked. Do NOT watch or unwatch // ranges from within it! The watch for the callback is cancelled after the // callback - the handle becomes invalid. WatchHandle WatchMemoryRange(uint32_t start, uint32_t length, @@ -83,8 +84,9 @@ class SharedMemory { void UnwatchMemoryRange(WatchHandle handle); // Locks the mutex that gets locked when watch callbacks are invoked - must be // done when checking variables that may be changed by a watch callback. - inline void LockWatchMutex() { validity_mutex_.lock(); } - inline void UnlockWatchMutex() { validity_mutex_.unlock(); } + inline std::unique_lock LockWatchMutex() { + return global_critical_region_.Acquire(); + } // Ensures the buffer tiles backing the range are resident, but doesn't upload // anything. @@ -125,8 +127,7 @@ class SharedMemory { private: bool AreTiledResourcesUsed() const; - // Mark the memory range as updated and protect it. The validity mutex must - // NOT be held when calling!!! + // Mark the memory range as updated and protect it. void MakeRangeValid(uint32_t valid_page_first, uint32_t valid_page_count); D3D12CommandProcessor* command_processor_; @@ -175,19 +176,20 @@ class SharedMemory { // Mutex between the exception handler and the command processor, to be locked // when checking or updating validity of pages/ranges. - std::recursive_mutex validity_mutex_; + xe::global_critical_region global_critical_region_; // *************************************************************************** - // Things below should be protected by validity_mutex_. + // Things below should be protected by global_critical_region. // *************************************************************************** // Bit vector containing whether physical memory system pages are up to date. std::vector valid_pages_; // Memory access callback. - static void MemoryWriteCallbackThunk(void* context_ptr, uint32_t page_first, - uint32_t page_last); - void MemoryWriteCallback(uint32_t page_first, uint32_t page_last); + static void MemoryWriteCallbackThunk(void* context_ptr, + uint32_t physical_address_start, + uint32_t length); + void MemoryWriteCallback(uint32_t physical_address_start, uint32_t length); struct GlobalWatch { GlobalWatchCallback callback; @@ -249,7 +251,7 @@ class SharedMemory { void UnlinkWatchRange(WatchRange* range); // *************************************************************************** - // Things above should be protected by validity_mutex_. + // Things above should be protected by global_critical_region. // *************************************************************************** // First page and length in pages. diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 54d6d2f01..564456765 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -1267,7 +1267,7 @@ void TextureCache::MarkRangeAsResolved(uint32_t start_unscaled, uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12; uint32_t block_first = page_first >> 5; uint32_t block_last = page_last >> 5; - shared_memory_->LockWatchMutex(); + auto watch_lock = shared_memory_->LockWatchMutex(); for (uint32_t i = block_first; i <= block_last; ++i) { uint32_t add_bits = UINT32_MAX; if (i == block_first) { @@ -1279,7 +1279,6 @@ void TextureCache::MarkRangeAsResolved(uint32_t start_unscaled, scaled_resolve_pages_[i] |= add_bits; scaled_resolve_pages_l2_[i >> 6] |= 1ull << (i & 63); } - shared_memory_->UnlockWatchMutex(); } // Invalidate textures. Toggling individual textures between scaled and @@ -1970,10 +1969,12 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { bool TextureCache::LoadTextureData(Texture* texture) { // See what we need to upload. - shared_memory_->LockWatchMutex(); - bool base_in_sync = texture->base_in_sync; - bool mips_in_sync = texture->mips_in_sync; - shared_memory_->UnlockWatchMutex(); + bool base_in_sync, mips_in_sync; + { + auto watch_lock = shared_memory_->LockWatchMutex(); + base_in_sync = texture->base_in_sync; + mips_in_sync = texture->mips_in_sync; + } if (base_in_sync && mips_in_sync) { return true; } @@ -2235,20 +2236,21 @@ bool TextureCache::LoadTextureData(Texture* texture) { // resolves as well to detect when the CPU wants to reuse the memory for a // regular texture or a vertex buffer, and thus the scaled resolve version is // not up to date anymore. - shared_memory_->LockWatchMutex(); - texture->base_in_sync = true; - texture->mips_in_sync = true; - if (!base_in_sync) { - texture->base_watch_handle = shared_memory_->WatchMemoryRange( - texture->key.base_page << 12, texture->base_size, WatchCallbackThunk, - this, texture, 0); + { + auto watch_lock = shared_memory_->LockWatchMutex(); + texture->base_in_sync = true; + texture->mips_in_sync = true; + if (!base_in_sync) { + texture->base_watch_handle = shared_memory_->WatchMemoryRange( + texture->key.base_page << 12, texture->base_size, WatchCallbackThunk, + this, texture, 0); + } + if (!mips_in_sync) { + texture->mip_watch_handle = shared_memory_->WatchMemoryRange( + texture->key.mip_page << 12, texture->mip_size, WatchCallbackThunk, + this, texture, 1); + } } - if (!mips_in_sync) { - texture->mip_watch_handle = shared_memory_->WatchMemoryRange( - texture->key.mip_page << 12, texture->mip_size, WatchCallbackThunk, - this, texture, 1); - } - shared_memory_->UnlockWatchMutex(); LogTextureAction(texture, "Loaded"); return true; @@ -2325,7 +2327,7 @@ bool TextureCache::IsRangeScaledResolved(uint32_t start_unscaled, uint32_t block_last = page_last >> 5; uint32_t l2_block_first = block_first >> 6; uint32_t l2_block_last = block_last >> 6; - shared_memory_->LockWatchMutex(); + auto watch_lock = shared_memory_->LockWatchMutex(); for (uint32_t i = l2_block_first; i <= l2_block_last; ++i) { uint64_t l2_block = scaled_resolve_pages_l2_[i]; if (i == l2_block_first) { @@ -2346,12 +2348,10 @@ bool TextureCache::IsRangeScaledResolved(uint32_t start_unscaled, check_bits &= (1u << ((page_last & 31) + 1)) - 1; } if (scaled_resolve_pages_[block_index] & check_bits) { - shared_memory_->UnlockWatchMutex(); return true; } } } - shared_memory_->UnlockWatchMutex(); return false; } diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index 2706fdd94..5053e4b23 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -96,6 +96,10 @@ Memory::~Memory() { // requests. mmio_handler_.reset(); + for (auto physical_write_watch : physical_write_watches_) { + delete physical_write_watch; + } + heaps_.v00000000.Dispose(); heaps_.v40000000.Dispose(); heaps_.v80000000.Dispose(); @@ -152,24 +156,27 @@ bool Memory::Initialize() { physical_membase_ = mapping_base_ + 0x100000000ull; // Prepare virtual heaps. - heaps_.v00000000.Initialize(virtual_membase_, 0x00000000, 0x40000000, 4096); - heaps_.v40000000.Initialize(virtual_membase_, 0x40000000, + heaps_.v00000000.Initialize(this, virtual_membase_, 0x00000000, 0x40000000, + 4096); + heaps_.v40000000.Initialize(this, virtual_membase_, 0x40000000, 0x40000000 - 0x01000000, 64 * 1024); - heaps_.v80000000.Initialize(virtual_membase_, 0x80000000, 0x10000000, + heaps_.v80000000.Initialize(this, virtual_membase_, 0x80000000, 0x10000000, 64 * 1024); - heaps_.v90000000.Initialize(virtual_membase_, 0x90000000, 0x10000000, 4096); + heaps_.v90000000.Initialize(this, virtual_membase_, 0x90000000, 0x10000000, + 4096); // Prepare physical heaps. - heaps_.physical.Initialize(physical_membase_, 0x00000000, 0x20000000, 4096); + heaps_.physical.Initialize(this, physical_membase_, 0x00000000, 0x20000000, + 4096); // HACK: should be 64k, but with us overlaying A and E it needs to be 4. - /*heaps_.vA0000000.Initialize(virtual_membase_, 0xA0000000, 0x20000000, + /*heaps_.vA0000000.Initialize(this, virtual_membase_, 0xA0000000, 0x20000000, 64 * 1024, &heaps_.physical);*/ - heaps_.vA0000000.Initialize(virtual_membase_, 0xA0000000, 0x20000000, + heaps_.vA0000000.Initialize(this, virtual_membase_, 0xA0000000, 0x20000000, 4 * 1024, &heaps_.physical); - heaps_.vC0000000.Initialize(virtual_membase_, 0xC0000000, 0x20000000, + heaps_.vC0000000.Initialize(this, virtual_membase_, 0xC0000000, 0x20000000, 16 * 1024 * 1024, &heaps_.physical); - heaps_.vE0000000.Initialize(virtual_membase_, 0xE0000000, 0x1FD00000, 4096, - &heaps_.physical); + heaps_.vE0000000.Initialize(this, virtual_membase_, 0xE0000000, 0x1FD00000, + 4096, &heaps_.physical); // Protect the first and last 64kb of memory. heaps_.v00000000.AllocFixed( @@ -189,7 +196,8 @@ bool Memory::Initialize() { // Add handlers for MMIO. mmio_handler_ = cpu::MMIOHandler::Install(virtual_membase_, physical_membase_, - physical_membase_ + 0x1FFFFFFF); + physical_membase_ + 0x1FFFFFFF, + AccessViolationCallbackThunk, this); if (!mmio_handler_) { XELOGE("Unable to install MMIO handlers"); assert_always(); @@ -397,6 +405,46 @@ cpu::MMIORange* Memory::LookupVirtualMappedRange(uint32_t virtual_address) { return mmio_handler_->LookupRange(virtual_address); } +bool Memory::AccessViolationCallback(size_t host_address, bool is_write) { + if (!is_write) { + // TODO(Triang3l): Handle GPU readback. + return false; + } + + // Access via physical_membase_ is special, when need to bypass everything, + // so only watching virtual memory regions. + if (host_address < reinterpret_cast(virtual_membase_) || + host_address >= reinterpret_cast(physical_membase_)) { + return false; + } + + uint32_t virtual_address = + uint32_t(reinterpret_cast(host_address) - virtual_membase_); + // Revert the adjustment made by CPU emulation. + if (virtual_address >= 0xE0000000) { + if (virtual_address < 0xE0001000) { + return false; + } + virtual_address -= 0x1000; + } + + BaseHeap* heap = LookupHeap(virtual_address); + if (heap == &heaps_.vA0000000 || heap == &heaps_.vC0000000 || + heap == &heaps_.vE0000000) { + return static_cast(heap)->TriggerWatches( + virtual_address / system_page_size_ * system_page_size_, + system_page_size_, is_write); + } + + return false; +} + +bool Memory::AccessViolationCallbackThunk(void* context, size_t host_address, + bool is_write) { + return reinterpret_cast(context)->AccessViolationCallback( + host_address, is_write); +} + uintptr_t Memory::AddPhysicalAccessWatch(uint32_t physical_address, uint32_t length, cpu::MMIOHandler::WatchType type, @@ -412,21 +460,38 @@ void Memory::CancelAccessWatch(uintptr_t watch_handle) { mmio_handler_->CancelAccessWatch(watch_handle); } -void* Memory::RegisterPhysicalWriteWatch( - cpu::PhysicalWriteWatchCallback callback, void* callback_context) { - return mmio_handler_->RegisterPhysicalWriteWatch(callback, callback_context); +void* Memory::RegisterPhysicalWriteWatch(PhysicalWriteWatchCallback callback, + void* callback_context) { + PhysicalWriteWatchEntry* entry = new PhysicalWriteWatchEntry; + entry->callback = callback; + entry->callback_context = callback_context; + + auto lock = global_critical_region_.Acquire(); + physical_write_watches_.push_back(entry); + + return entry; } void Memory::UnregisterPhysicalWriteWatch(void* watch_handle) { - mmio_handler_->UnregisterPhysicalWriteWatch(watch_handle); + auto entry = reinterpret_cast(watch_handle); + { + auto lock = global_critical_region_.Acquire(); + auto it = std::find(physical_write_watches_.begin(), + physical_write_watches_.end(), entry); + assert_false(it == physical_write_watches_.end()); + if (it != physical_write_watches_.end()) { + physical_write_watches_.erase(it); + } + } + delete entry; } void Memory::WatchPhysicalMemoryWrite(uint32_t physical_address, uint32_t length) { // Watch independently in all three mappings. - heaps_.vA0000000.WatchWrite(physical_address, length, mmio_handler_.get()); - heaps_.vC0000000.WatchWrite(physical_address, length, mmio_handler_.get()); - heaps_.vE0000000.WatchWrite(physical_address, length, mmio_handler_.get()); + heaps_.vA0000000.WatchPhysicalWrite(physical_address, length); + heaps_.vC0000000.WatchPhysicalWrite(physical_address, length); + heaps_.vE0000000.WatchPhysicalWrite(physical_address, length); } uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment, @@ -536,8 +601,9 @@ BaseHeap::BaseHeap() BaseHeap::~BaseHeap() = default; -void BaseHeap::Initialize(uint8_t* membase, uint32_t heap_base, +void BaseHeap::Initialize(Memory* memory, uint8_t* membase, uint32_t heap_base, uint32_t heap_size, uint32_t page_size) { + memory_ = memory; membase_ = membase; heap_base_ = heap_base; heap_size_ = heap_size - 1; @@ -710,6 +776,7 @@ bool BaseHeap::Restore(ByteStream* stream) { void BaseHeap::Reset() { // TODO(DrChat): protect pages. std::memset(page_table_.data(), 0, sizeof(PageEntry) * page_table_.size()); + // TODO(Triang3l): Unwatch pages. } bool BaseHeap::Alloc(uint32_t size, uint32_t alignment, @@ -1207,20 +1274,32 @@ VirtualHeap::VirtualHeap() = default; VirtualHeap::~VirtualHeap() = default; -void VirtualHeap::Initialize(uint8_t* membase, uint32_t heap_base, - uint32_t heap_size, uint32_t page_size) { - BaseHeap::Initialize(membase, heap_base, heap_size, page_size); +void VirtualHeap::Initialize(Memory* memory, uint8_t* membase, + uint32_t heap_base, uint32_t heap_size, + uint32_t page_size) { + BaseHeap::Initialize(memory, membase, heap_base, heap_size, page_size); } PhysicalHeap::PhysicalHeap() : parent_heap_(nullptr) {} PhysicalHeap::~PhysicalHeap() = default; -void PhysicalHeap::Initialize(uint8_t* membase, uint32_t heap_base, - uint32_t heap_size, uint32_t page_size, - VirtualHeap* parent_heap) { - BaseHeap::Initialize(membase, heap_base, heap_size, page_size); +void PhysicalHeap::Initialize(Memory* memory, uint8_t* membase, + uint32_t heap_base, uint32_t heap_size, + uint32_t page_size, VirtualHeap* parent_heap) { + BaseHeap::Initialize(memory, membase, heap_base, heap_size, page_size); parent_heap_ = parent_heap; + system_page_size_ = uint32_t(xe::memory::page_size()); + + // Include the 0xE0000000 mapping offset because these bits are for host OS + // pages. + system_page_count_ = + (heap_size_ /* already - 1 */ + (heap_base >= 0xE0000000 ? 0x1000 : 0) + + system_page_size_) / + system_page_size_; + system_pages_watched_write_.resize((system_page_count_ + 63) / 64); + std::memset(system_pages_watched_write_.data(), 0, + system_pages_watched_write_.size() * sizeof(uint64_t)); } bool PhysicalHeap::Alloc(uint32_t size, uint32_t alignment, @@ -1362,8 +1441,11 @@ bool PhysicalHeap::Release(uint32_t base_address, uint32_t* out_region_size) { uint32_t parent_base_address = GetPhysicalAddress(base_address); uint32_t region_size = 0; if (QuerySize(base_address, ®ion_size)) { - cpu::MMIOHandler::global_handler()->InvalidateRange( - base_address, region_size, !FLAGS_protect_on_release); + // TODO(Triang3l): Remove InvalidateRange when legacy (old Vulkan renderer) + // watches are removed. + cpu::MMIOHandler::global_handler()->InvalidateRange(base_address, + region_size); + TriggerWatches(base_address, region_size, true, !FLAGS_protect_on_release); } if (!parent_heap_->Release(parent_base_address, out_region_size)) { @@ -1378,7 +1460,10 @@ bool PhysicalHeap::Protect(uint32_t address, uint32_t size, uint32_t protect, uint32_t* old_protect) { auto global_lock = global_critical_region_.Acquire(); - cpu::MMIOHandler::global_handler()->InvalidateRange(address, size, false); + // TODO(Triang3l): Remove InvalidateRange when legacy (old Vulkan renderer) + // watches are removed. + cpu::MMIOHandler::global_handler()->InvalidateRange(address, size); + TriggerWatches(address, size, true, false); if (!parent_heap_->Protect(GetPhysicalAddress(address), size, protect, old_protect)) { @@ -1389,47 +1474,219 @@ bool PhysicalHeap::Protect(uint32_t address, uint32_t size, uint32_t protect, return BaseHeap::Protect(address, size, protect); } -void PhysicalHeap::WatchWrite(uint32_t address, uint32_t size, - cpu::MMIOHandler* mmio_handler) { - address &= 0x1FFFFFFF; - if (address >= heap_size_) { - // E0000000 is not exactly 512 MB long. +void PhysicalHeap::WatchPhysicalWrite(uint32_t physical_address, + uint32_t length) { + uint32_t physical_address_offset = GetPhysicalAddress(heap_base_); + if (physical_address < physical_address_offset) { + if (physical_address_offset - physical_address >= length) { + return; + } + length -= physical_address_offset - physical_address; + physical_address = physical_address_offset; + } + uint32_t heap_relative_address = physical_address - physical_address_offset; + if (heap_relative_address >= heap_size_ + 1) { return; } - size = std::min(size, heap_size_ - address); - if (size == 0) { + length = std::min(length, heap_size_ + 1 - heap_relative_address); + if (length == 0) { return; } - uint32_t system_page_size = uint32_t(xe::memory::page_size()); - uint32_t system_page_first = address / system_page_size; - uint32_t system_page_last = (address + size - 1) / system_page_size; + // Include the 0xE0000000 mapping offset because watches are placed on OS + // pages. + uint32_t system_address_offset = heap_base_ >= 0xE0000000 ? 0x1000 : 0; + uint32_t system_page_first = + (heap_relative_address + system_address_offset) / system_page_size_; + uint32_t system_page_last = + (heap_relative_address + length - 1 + system_address_offset) / + system_page_size_; + system_page_last = std::min(system_page_last, system_page_count_ - 1); + assert_true(system_page_first <= system_page_last); + uint32_t block_index_first = system_page_first >> 6; + uint32_t block_index_last = system_page_last >> 6; auto global_lock = global_critical_region_.Acquire(); - // Watch all writable pages of the system page size within the requested - // range. - uint32_t range_start = UINT32_MAX; + // Protect the pages. + uint8_t* protect_base = membase_ + heap_base_; + uint32_t protect_system_page_first = UINT32_MAX; for (uint32_t i = system_page_first; i <= system_page_last; ++i) { - if (page_table_[i * system_page_size / page_size_].current_protect & - kMemoryProtectWrite) { - if (range_start == UINT32_MAX) { - range_start = i; + // Check if need to allow writing to this page. + bool protect_page = + (system_pages_watched_write_[i >> 6] & (uint64_t(1) << (i & 63))) == 0; + if (protect_page) { + uint32_t page_number = + xe::sat_sub(i * system_page_size_, system_address_offset) / + page_size_; + if (ToPageAccess(page_table_[page_number].current_protect) != + xe::memory::PageAccess::kReadWrite) { + protect_page = false; + } + } + if (protect_page) { + if (protect_system_page_first == UINT32_MAX) { + protect_system_page_first = i; } } else { - if (range_start != UINT32_MAX) { - mmio_handler->ProtectAndWatchPhysicalMemory( - heap_base_ + range_start * system_page_size, - (i - range_start) * system_page_size); - range_start = UINT32_MAX; + if (protect_system_page_first != UINT32_MAX) { + xe::memory::Protect( + protect_base + protect_system_page_first * system_page_size_, + (i - protect_system_page_first) * system_page_size_, + xe::memory::PageAccess::kReadOnly); + protect_system_page_first = UINT32_MAX; } } } - if (range_start != UINT32_MAX) { - mmio_handler->ProtectAndWatchPhysicalMemory( - heap_base_ + range_start * system_page_size, - (system_page_last - range_start + 1) * system_page_size); + if (protect_system_page_first != UINT32_MAX) { + xe::memory::Protect( + protect_base + protect_system_page_first * system_page_size_, + (system_page_last + 1 - protect_system_page_first) * system_page_size_, + xe::memory::PageAccess::kReadOnly); + } + + // Register the pages as watched. + for (uint32_t i = block_index_first; i <= block_index_last; ++i) { + uint64_t mask = UINT64_MAX; + if (i == block_index_first) { + mask &= ~((uint64_t(1) << (system_page_first & 63)) - 1); + } + if (i == block_index_last && (system_page_last & 63) != 63) { + mask &= (uint64_t(1) << ((system_page_last & 63) + 1)) - 1; + } + system_pages_watched_write_[i] |= mask; } } +bool PhysicalHeap::TriggerWatches(uint32_t virtual_address, uint32_t length, + bool is_write, bool unprotect) { + // TODO(Triang3l): Support read watches. + assert_true(is_write); + if (!is_write) { + return false; + } + + if (virtual_address < heap_base_) { + if (heap_base_ - virtual_address >= length) { + return false; + } + length -= heap_base_ - virtual_address; + virtual_address = heap_base_; + } + uint32_t heap_relative_address = virtual_address - heap_base_; + if (heap_relative_address >= heap_size_ + 1) { + return false; + } + length = std::min(length, heap_size_ + 1 - heap_relative_address); + if (length == 0) { + return false; + } + + // Include the 0xE0000000 mapping offset because watches are placed on OS + // pages. + uint32_t system_address_offset = heap_base_ >= 0xE0000000 ? 0x1000 : 0; + uint32_t system_page_first = + (heap_relative_address + system_address_offset) / system_page_size_; + uint32_t system_page_last = + (heap_relative_address + length - 1 + system_address_offset) / + system_page_size_; + system_page_last = std::min(system_page_last, system_page_count_ - 1); + assert_true(system_page_first <= system_page_last); + uint32_t block_index_first = system_page_first >> 6; + uint32_t block_index_last = system_page_last >> 6; + + auto global_lock = global_critical_region_.Acquire(); + + // Check if watching any page, whether need to call the callback at all. + bool any_watched = false; + for (uint32_t i = block_index_first; i <= block_index_last; ++i) { + uint64_t block = system_pages_watched_write_[i]; + if (i == block_index_first) { + block &= ~((uint64_t(1) << (system_page_first & 63)) - 1); + } + if (i == block_index_last && (system_page_last & 63) != 63) { + block &= (uint64_t(1) << ((system_page_last & 63) + 1)) - 1; + } + if (block) { + any_watched = true; + break; + } + } + if (!any_watched) { + return false; + } + + // Trigger callbacks. + // TODO(Triang3l): Accumulate the range that is safe to unwatch from the + // callbacks. + uint32_t physical_address_offset = GetPhysicalAddress(heap_base_); + uint32_t physical_address_start = + xe::sat_sub(system_page_first * system_page_size_, + system_address_offset) + + physical_address_offset; + uint32_t physical_length = std::min( + xe::sat_sub(system_page_last * system_page_size_ + system_page_size_, + system_address_offset) + + physical_address_offset - physical_address_start, + heap_size_ + 1 - (physical_address_start - physical_address_offset)); + for (auto physical_write_watch : memory_->physical_write_watches_) { + physical_write_watch->callback(physical_write_watch->callback_context, + physical_address_start, physical_length); + } + + // Unprotect ranges that need unprotection. + if (unprotect) { + uint8_t* protect_base = membase_ + heap_base_; + uint32_t unprotect_system_page_first = UINT32_MAX; + for (uint32_t i = system_page_first; i <= system_page_last; ++i) { + // Check if need to allow writing to this page. + bool unprotect_page = (system_pages_watched_write_[i >> 6] & + (uint64_t(1) << (i & 63))) != 0; + if (unprotect_page) { + uint32_t page_number = + xe::sat_sub(i * system_page_size_, system_address_offset) / + page_size_; + if (ToPageAccess(page_table_[page_number].current_protect) != + xe::memory::PageAccess::kReadWrite) { + unprotect_page = false; + } + } + if (unprotect_page) { + if (unprotect_system_page_first == UINT32_MAX) { + unprotect_system_page_first = i; + } + } else { + if (unprotect_system_page_first != UINT32_MAX) { + xe::memory::Protect( + protect_base + unprotect_system_page_first * system_page_size_, + (i - unprotect_system_page_first) * system_page_size_, + xe::memory::PageAccess::kReadWrite); + unprotect_system_page_first = UINT32_MAX; + } + } + } + if (unprotect_system_page_first != UINT32_MAX) { + xe::memory::Protect( + protect_base + unprotect_system_page_first * system_page_size_, + (system_page_last + 1 - unprotect_system_page_first) * + system_page_size_, + xe::memory::PageAccess::kReadWrite); + } + } + + // Mark pages as not write-watched. + for (uint32_t i = block_index_first; i <= block_index_last; ++i) { + uint64_t mask = 0; + if (i == block_index_first) { + mask |= (uint64_t(1) << (system_page_first & 63)) - 1; + } + if (i == block_index_last && (system_page_last & 63) != 63) { + mask |= ~((uint64_t(1) << ((system_page_last & 63) + 1)) - 1); + } + system_pages_watched_write_[i] &= mask; + } + + return true; +} + } // namespace xe diff --git a/src/xenia/memory.h b/src/xenia/memory.h index 94059b15c..8f6431ba3 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -25,6 +25,8 @@ class ByteStream; namespace xe { +class Memory; + enum SystemHeapFlag : uint32_t { kSystemHeapVirtual = 1 << 0, kSystemHeapPhysical = 1 << 1, @@ -163,9 +165,10 @@ class BaseHeap { protected: BaseHeap(); - void Initialize(uint8_t* membase, uint32_t heap_base, uint32_t heap_size, - uint32_t page_size); + void Initialize(Memory* memory, uint8_t* membase, uint32_t heap_base, + uint32_t heap_size, uint32_t page_size); + Memory* memory_; uint8_t* membase_; uint32_t heap_base_; uint32_t heap_size_; @@ -181,8 +184,8 @@ class VirtualHeap : public BaseHeap { ~VirtualHeap() override; // Initializes the heap properties and allocates the page table. - void Initialize(uint8_t* membase, uint32_t heap_base, uint32_t heap_size, - uint32_t page_size); + void Initialize(Memory* memory, uint8_t* membase, uint32_t heap_base, + uint32_t heap_size, uint32_t page_size); }; // A heap for ranges of memory that are mapped to physical ranges. @@ -198,8 +201,9 @@ class PhysicalHeap : public BaseHeap { ~PhysicalHeap() override; // Initializes the heap properties and allocates the page table. - void Initialize(uint8_t* membase, uint32_t heap_base, uint32_t heap_size, - uint32_t page_size, VirtualHeap* parent_heap); + void Initialize(Memory* memory, uint8_t* membase, uint32_t heap_base, + uint32_t heap_size, uint32_t page_size, + VirtualHeap* parent_heap); bool Alloc(uint32_t size, uint32_t alignment, uint32_t allocation_type, uint32_t protect, bool top_down, uint32_t* out_address) override; @@ -215,11 +219,18 @@ class PhysicalHeap : public BaseHeap { bool Protect(uint32_t address, uint32_t size, uint32_t protect, uint32_t* old_protect = nullptr) override; - void WatchWrite(uint32_t address, uint32_t size, - cpu::MMIOHandler* mmio_handler); + void WatchPhysicalWrite(uint32_t physical_address, uint32_t length); + // Returns true if any page in the range was watched. + bool TriggerWatches(uint32_t virtual_address, uint32_t length, bool is_write, + bool unprotect = true); protected: VirtualHeap* parent_heap_; + + uint32_t system_page_size_; + uint32_t system_page_count_; + // Protected by global_critical_region. + std::vector system_pages_watched_write_; }; // Models the entire guest memory system on the console. @@ -322,9 +333,46 @@ class Memory { // Cancels a write watch requested with AddPhysicalAccessWatch. void CancelAccessWatch(uintptr_t watch_handle); - // Registers a global callback for physical memory writes. See - // cpu/mmio_handler.h for more details about physical memory write watches. - void* RegisterPhysicalWriteWatch(cpu::PhysicalWriteWatchCallback callback, + typedef void (*PhysicalWriteWatchCallback)(void* context_ptr, + uint32_t physical_address_start, + uint32_t length); + + // Physical memory write watching, allowing subsystems to invalidate cached + // data that depends on memory contents. + // + // Placing a watch simply marks the pages (of the system page size) as + // watched, individual watched ranges (or which specific subscribers are + // watching specific pages) are not stored. Because of this, callbacks may be + // triggered multiple times for a single range, and for any watched page every + // registered callbacks is triggered. This is a very simple one-shot method + // for use primarily for cache invalidation - there may be spurious firing, + // for example, if the game only changes the protection level without writing + // anything. + // + // A range of pages can be watched at any time, but pages are only unwatched + // when watches are triggered (since multiple subscribers can depend on the + // same memory, and one subscriber shouldn't interfere with another). + // + // Callbacks can be triggered for one page (if the guest just stores words) or + // for multiple pages (for file reading, protection level changes). + // + // Only guest physical memory mappings are watched - the host-only mapping is + // not protected so it can be used to bypass the write protection (for file + // reads, for example - in this case, watches are triggered manually). + // + // Note that when a watch is triggered, the watched page is unprotected only + // in the heap where the address is located. Since different virtual memory + // mappings of physical memory can have different protection levels for the + // same pages, and watches must not be placed on read-only or totally + // inaccessible pages, there are significant difficulties with synchronizing + // all the three ranges, but it's generally not needed. + // + // TODO(Triang3l): Allow the callbacks to unwatch regions larger than one page + // (for instance, 64 KB) so there are less access violations. All callbacks + // must agree to unwatch larger ranges because in some cases (like regions + // near the locations that render targets have been resolved to) it is + // necessary to invalidate only a single page and none more. + void* RegisterPhysicalWriteWatch(PhysicalWriteWatchCallback callback, void* callback_context); // Unregisters a physical memory write watch previously added with @@ -336,11 +384,8 @@ class Memory { // protection of it changes), the registered watch callbacks are triggered for // the page (or pages, for file reads and protection changes) where something // has been written to. This protects physical memory only under - // virtual_membase_, so writing to physical_membase_ can be written to bypass - // the protection placed by the watches. Read-only and inaccessible pages are - // silently ignored, only attempts to write to read-write pages will trigger - // watch callbacks. - // AVOID CALLING WITH MUTEXES LOCKED!!! + // virtual_membase_, so writing to physical_membase_ can be done to bypass the + // protection placed by the watches. void WatchPhysicalMemoryWrite(uint32_t physical_address, uint32_t length); // Allocates virtual memory from the 'system' heap. @@ -372,6 +417,10 @@ class Memory { int MapViews(uint8_t* mapping_base); void UnmapViews(); + bool AccessViolationCallback(size_t host_address, bool is_write); + static bool AccessViolationCallbackThunk(void* context, size_t host_address, + bool is_write); + private: std::wstring file_name_; uint32_t system_page_size_ = 0; @@ -410,6 +459,14 @@ class Memory { } heaps_; friend class BaseHeap; + + friend class PhysicalHeap; + struct PhysicalWriteWatchEntry { + PhysicalWriteWatchCallback callback; + void* callback_context; + }; + xe::global_critical_region global_critical_region_; + std::vector physical_write_watches_; }; } // namespace xe