From 4f7edff19de8cc254269318c69315dab110084a8 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 26 Jul 2018 22:52:26 +0300 Subject: [PATCH] [D3D12] SHM: Watches prototype, some uploading --- src/xenia/cpu/mmio_handler.cc | 59 ++++++++++++ src/xenia/cpu/mmio_handler.h | 9 ++ .../gpu/d3d12/d3d12_command_processor.cc | 1 + src/xenia/gpu/d3d12/shared_memory.cc | 95 +++++++++++++++++-- src/xenia/gpu/d3d12/shared_memory.h | 3 + src/xenia/memory.cc | 15 +++ src/xenia/memory.h | 16 ++++ 7 files changed, 192 insertions(+), 6 deletions(-) diff --git a/src/xenia/cpu/mmio_handler.cc b/src/xenia/cpu/mmio_handler.cc index 33bdaf889..f691c287e 100644 --- a/src/xenia/cpu/mmio_handler.cc +++ b/src/xenia/cpu/mmio_handler.cc @@ -214,6 +214,58 @@ void MMIOHandler::CancelAccessWatch(uintptr_t watch_handle) { delete entry; } +void MMIOHandler::SetGlobalPhysicalAccessWatch( + GlobalAccessWatchCallback callback, void* callback_context) { + auto lock = global_critical_region_.Acquire(); + global_physical_watch_callback_ = callback; + global_physical_watch_callback_context_ = callback_context; +} + +void MMIOHandler::ProtectPhysicalMemory(uint32_t physical_address, + uint32_t length, WatchType type) { + uint32_t base_address = physical_address & 0x1FFFFFFF; + + // Can only protect sizes matching system page size. + // This means we need to round up, which will cause spurious access + // violations and invalidations. + // TODO(benvanik): only invalidate if actually within the region? + length = + xe::round_up(length + (base_address % uint32_t(xe::memory::page_size())), + uint32_t(xe::memory::page_size())); + base_address = base_address - (base_address % xe::memory::page_size()); + + auto page_access = memory::PageAccess::kNoAccess; + switch (type) { + case kWatchInvalid: + page_access = memory::PageAccess::kReadWrite; + break; + case kWatchWrite: + page_access = memory::PageAccess::kReadOnly; + break; + case kWatchReadWrite: + page_access = memory::PageAccess::kNoAccess; + break; + default: + assert_unhandled_case(type); + break; + } + + // Protect the range under all address spaces + memory::Protect(physical_membase_ + base_address, length, page_access, + nullptr); + memory::Protect(virtual_membase_ + 0xA0000000 + base_address, length, + page_access, nullptr); + memory::Protect(virtual_membase_ + 0xC0000000 + base_address, length, + page_access, nullptr); + memory::Protect(virtual_membase_ + 0xE0000000 + base_address, length, + page_access, nullptr); +} + +void MMIOHandler::UnprotectPhysicalMemory(uint32_t physical_address, + uint32_t length) { + ProtectPhysicalMemory(physical_address, length, kWatchInvalid); +} + void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) { auto lock = global_critical_region_.Acquire(); @@ -262,6 +314,13 @@ bool MMIOHandler::IsRangeWatched(uint32_t physical_address, size_t length) { bool MMIOHandler::CheckAccessWatch(uint32_t physical_address) { auto lock = global_critical_region_.Acquire(); + if (global_physical_watch_callback_ != nullptr) { + if (global_physical_watch_callback_(global_physical_watch_callback_context_, + physical_address)) { + return true; + } + } + bool hit = false; for (auto it = access_watches_.begin(); it != access_watches_.end();) { auto entry = *it; diff --git a/src/xenia/cpu/mmio_handler.h b/src/xenia/cpu/mmio_handler.h index e61cd1c20..8a864a9a6 100644 --- a/src/xenia/cpu/mmio_handler.h +++ b/src/xenia/cpu/mmio_handler.h @@ -30,6 +30,7 @@ typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context, uint32_t addr, uint32_t value); typedef void (*AccessWatchCallback)(void* context_ptr, void* data_ptr, uint32_t address); +typedef bool (*GlobalAccessWatchCallback)(void* context_ptr, uint32_t address); struct MMIORange { uint32_t address; @@ -74,6 +75,12 @@ class MMIOHandler { void* callback_context, void* callback_data); void CancelAccessWatch(uintptr_t watch_handle); + void SetGlobalPhysicalAccessWatch(GlobalAccessWatchCallback callback, + void* callback_context); + void ProtectPhysicalMemory(uint32_t physical_address, uint32_t length, + WatchType type); + void UnprotectPhysicalMemory(uint32_t physical_address, uint32_t length); + // Fires and clears any access watches that overlap this range. void InvalidateRange(uint32_t physical_address, size_t length); @@ -112,6 +119,8 @@ class MMIOHandler { xe::global_critical_region global_critical_region_; // TODO(benvanik): data structure magic. std::list access_watches_; + GlobalAccessWatchCallback global_physical_watch_callback_ = nullptr; + void* global_physical_watch_callback_context_; static MMIOHandler* global_handler_; }; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index dddd780a9..43116e91a 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -67,6 +67,7 @@ void D3D12CommandProcessor::ShutdownContext() { context->AwaitAllFramesCompletion(); pipeline_cache_.reset(); + shared_memory_.reset(); for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) { command_lists_[i].reset(); diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index 8e28f343b..f89d6ca75 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -79,10 +79,14 @@ bool SharedMemory::Initialize() { upload_buffer_submitted_first_ = nullptr; upload_buffer_submitted_last_ = nullptr; + memory_->SetGlobalPhysicalAccessWatch(WatchCallbackThunk, this); + return true; } void SharedMemory::Shutdown() { + memory_->SetGlobalPhysicalAccessWatch(nullptr, nullptr); + while (upload_buffer_available_first_ != nullptr) { auto upload_buffer_next = upload_buffer_available_first_->next; upload_buffer_available_first_->buffer->Release(); @@ -111,6 +115,8 @@ void SharedMemory::Shutdown() { } void SharedMemory::BeginFrame() { + // XELOGGPU("SharedMemory: BeginFrame start"); + // Check triggered watches, clear them and mark modified pages as out of date. watch_mutex_.lock(); for (uint32_t i = 0; i < watches_triggered_l2_.size(); ++i) { @@ -142,10 +148,14 @@ void SharedMemory::BeginFrame() { } heap_creation_failed_ = false; + + // XELOGGPU("SharedMemory: BeginFrame end"); } bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup, ID3D12GraphicsCommandList* command_list_draw) { + // XELOGGPU("SharedMemory: EndFrame start"); + // Before drawing starts, it's assumed that the buffer is a copy destination. // This transition is for the next frame, not for the current one. TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list_draw); @@ -162,10 +172,10 @@ bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup, uint32_t upload_range_start = 0, upload_range_length; while ((upload_range_start = NextUploadRange(upload_end, upload_range_length)) != UINT_MAX) { - XELOGGPU( + /* XELOGGPU( "Shared memory: Uploading %.8X-%.8X range", upload_range_start << page_size_log2_, - ((upload_range_start + upload_range_length) << page_size_log2_) - 1); + ((upload_range_start + upload_range_length) << page_size_log2_) - 1); */ while (upload_range_length > 0) { if (upload_buffer_mapping == nullptr) { // Create a completely new upload buffer if the available pool is empty. @@ -215,7 +225,7 @@ bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup, (upload_buffer_written << page_size_log2_), memory_->TranslatePhysical(upload_range_start << page_size_log2_), upload_write_length << page_size_log2_); - command_list_draw->CopyBufferRegion( + command_list_setup->CopyBufferRegion( buffer_, upload_range_start << page_size_log2_, upload_buffer_available_first_->buffer, upload_buffer_written << page_size_log2_, @@ -265,6 +275,42 @@ bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup, upload_buffer_submitted_last_ = upload_buffer; } + // Protect the uploaded ranges. + // TODO(Triang3l): Add L2 or store ranges in a list - this may hold the mutex + // for pretty long. + if (upload_end != 0) { + watch_mutex_.lock(); + uint32_t protect_end = 0, protect_start, protect_length; + while ((protect_start = NextUploadRange(protect_end, protect_length)) != + UINT_MAX) { + if (protect_start >= upload_end) { + break; + } + protect_length = std::min(protect_length, upload_end - protect_start); + uint32_t protect_last = protect_start + protect_length - 1; + uint32_t protect_block_first = protect_start >> 6; + uint32_t protect_block_last = protect_last >> 6; + for (uint32_t i = protect_block_first; i <= protect_block_last; ++i) { + uint64_t protect_bits = ~0ull; + if (i == protect_block_first) { + protect_bits &= ~((1ull << (protect_start & 63)) - 1); + } + if (i == protect_block_last && (protect_last & 63) != 63) { + protect_bits &= (1ull << ((protect_last & 63) + 1)) - 1; + } + watched_pages_[i] |= protect_bits; + } + memory_->ProtectPhysicalMemory(protect_start << page_size_log2_, + protect_length << page_size_log2_, + cpu::MMIOHandler::WatchType::kWatchWrite); + protect_end = protect_last + 1; + if (protect_end >= upload_end) { + break; + } + } + watch_mutex_.unlock(); + } + // Mark the newly uploaded ranges as uploaded. std::memset(upload_pages_.data(), 0, (upload_end >> 6) * sizeof(uint64_t)); if (upload_end < page_count_) { @@ -279,6 +325,8 @@ bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup, } } + // XELOGGPU("SharedMemory: EndFrame end"); + return upload_end != 0; } @@ -325,7 +373,7 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) { return false; } uint32_t last = start + length - 1; - XELOGGPU("Shared memory: Range %.8X-%.8X is being used", start, last); + // XELOGGPU("Shared memory: Range %.8X-%.8X is being used", start, last); // Ensure all tile heaps are present. uint32_t heap_first = start >> kHeapSizeLog2; @@ -339,9 +387,9 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) { // current frame anymore if have failed at least once. return false; } - XELOGGPU("Shared memory: Creating %.8X-%.8X tile heap", + /* XELOGGPU("Shared memory: Creating %.8X-%.8X tile heap", heap_first << kHeapSizeLog2, - (heap_last << kHeapSizeLog2) + (kHeapSize - 1)); + (heap_last << kHeapSizeLog2) + (kHeapSize - 1)); */ auto provider = context_->GetD3D12Provider(); auto device = provider->GetDevice(); auto direct_queue = provider->GetDirectQueue(); @@ -379,6 +427,8 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) { // Mark the outdated tiles in this range as requiring upload, and also make // them up-to-date so textures aren't invalidated every use. // TODO(Triang3l): Invalidate textures referencing outdated pages. + // Safe invalidate textures here because only actually used ranges will be + // uploaded and marked as in-sync at the end of the frame. uint32_t page_first_index = start >> page_size_log2_; uint32_t page_last_index = last >> page_size_log2_; uint32_t block_first_index = page_first_index >> 6; @@ -398,6 +448,39 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) { return true; } +bool SharedMemory::WatchCallbackThunk(void* context_ptr, uint32_t address) { + return reinterpret_cast(context_ptr)->WatchCallback(address); +} + +bool SharedMemory::WatchCallback(uint32_t address) { + address &= 0x1FFFFFFF; + uint32_t page_index_l1_global = address >> page_size_log2_; + uint32_t block_index_l1 = page_index_l1_global >> 6; + uint64_t page_bit_l1 = 1ull << (page_index_l1_global & 63); + + std::lock_guard lock(watch_mutex_); + if (!(watched_pages_[block_index_l1] & page_bit_l1)) { + return false; + } + // XELOGGPU("Shared memory: Watch triggered for %.8X", address); + + // Mark the page as modified. + uint32_t block_index_l2 = block_index_l1 >> 6; + uint64_t page_bit_l2 = 1ull << (block_index_l1 & 63); + if (!(watches_triggered_l2_[block_index_l2] & page_bit_l2)) { + watches_triggered_l2_[block_index_l2] |= page_bit_l2; + // L1 is not cleared in BeginFrame, so clear it now. + watches_triggered_l1_[block_index_l1] = 0; + } + watches_triggered_l1_[block_index_l1] |= page_bit_l1; + + // Unprotect the page. + memory_->UnprotectPhysicalMemory(page_index_l1_global << page_size_log2_, + 1 << page_size_log2_); + watched_pages_[block_index_l1] &= ~page_bit_l1; + return true; +} + void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state, ID3D12GraphicsCommandList* command_list) { if (buffer_state_ == new_state) { diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index b37e6aa5c..3eec30634 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -94,6 +94,9 @@ class SharedMemory { // cleared! std::vector watches_triggered_l1_; std::vector watches_triggered_l2_; + // Memory access callback. + static bool WatchCallbackThunk(void* context_ptr, uint32_t address); + bool WatchCallback(uint32_t address); // Pages that need to be uploaded in this frame (that are used but modified). std::vector upload_pages_; diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index cb2553fe1..e4ab8611b 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -412,6 +412,21 @@ void Memory::CancelAccessWatch(uintptr_t watch_handle) { mmio_handler_->CancelAccessWatch(watch_handle); } +void Memory::SetGlobalPhysicalAccessWatch( + cpu::GlobalAccessWatchCallback callback, void* callback_context) { + mmio_handler_->SetGlobalPhysicalAccessWatch(callback, callback_context); +} + +void Memory::ProtectPhysicalMemory(uint32_t physical_address, uint32_t length, + cpu::MMIOHandler::WatchType type) { + mmio_handler_->ProtectPhysicalMemory(physical_address, length, type); +} + +void Memory::UnprotectPhysicalMemory(uint32_t physical_address, + uint32_t length) { + mmio_handler_->UnprotectPhysicalMemory(physical_address, length); +} + uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment, uint32_t system_heap_flags) { // TODO(benvanik): lightweight pool. diff --git a/src/xenia/memory.h b/src/xenia/memory.h index 4309ded05..87834280f 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -319,6 +319,22 @@ class Memory { // Cancels a write watch requested with AddPhysicalAccessWatch. void CancelAccessWatch(uintptr_t watch_handle); + // Sets the default access watch callback for physical memory, which has a + // higher priority than watches - if it returns true, watches won't be + // triggered. + void SetGlobalPhysicalAccessWatch(cpu::GlobalAccessWatchCallback callback, + void* callback_context); + + // Protects a physical memory range without adding a watch, primarily for use + // with the global physical access watch. + void ProtectPhysicalMemory(uint32_t physical_address, uint32_t length, + cpu::MMIOHandler::WatchType type); + + // Unprotects a physical memory range previously protected using + // ProtectPhysicalMemory, primarily for use with the global physical access + // watch. + void UnprotectPhysicalMemory(uint32_t physical_address, uint32_t length); + // Allocates virtual memory from the 'system' heap. // System memory is kept separate from game memory but is still accessible // using normal guest virtual addresses. Kernel structures and other internal