[D3D12] SHM: Watches prototype, some uploading

This commit is contained in:
Triang3l 2018-07-26 22:52:26 +03:00
parent 812a35caff
commit 4f7edff19d
7 changed files with 192 additions and 6 deletions

View File

@ -214,6 +214,58 @@ void MMIOHandler::CancelAccessWatch(uintptr_t watch_handle) {
delete entry; delete entry;
} }
void MMIOHandler::SetGlobalPhysicalAccessWatch(
GlobalAccessWatchCallback callback, void* callback_context) {
auto lock = global_critical_region_.Acquire();
global_physical_watch_callback_ = callback;
global_physical_watch_callback_context_ = callback_context;
}
void MMIOHandler::ProtectPhysicalMemory(uint32_t physical_address,
uint32_t length, WatchType type) {
uint32_t base_address = physical_address & 0x1FFFFFFF;
// Can only protect sizes matching system page size.
// This means we need to round up, which will cause spurious access
// violations and invalidations.
// TODO(benvanik): only invalidate if actually within the region?
length =
xe::round_up(length + (base_address % uint32_t(xe::memory::page_size())),
uint32_t(xe::memory::page_size()));
base_address = base_address - (base_address % xe::memory::page_size());
auto page_access = memory::PageAccess::kNoAccess;
switch (type) {
case kWatchInvalid:
page_access = memory::PageAccess::kReadWrite;
break;
case kWatchWrite:
page_access = memory::PageAccess::kReadOnly;
break;
case kWatchReadWrite:
page_access = memory::PageAccess::kNoAccess;
break;
default:
assert_unhandled_case(type);
break;
}
// Protect the range under all address spaces
memory::Protect(physical_membase_ + base_address, length, page_access,
nullptr);
memory::Protect(virtual_membase_ + 0xA0000000 + base_address, length,
page_access, nullptr);
memory::Protect(virtual_membase_ + 0xC0000000 + base_address, length,
page_access, nullptr);
memory::Protect(virtual_membase_ + 0xE0000000 + base_address, length,
page_access, nullptr);
}
void MMIOHandler::UnprotectPhysicalMemory(uint32_t physical_address,
uint32_t length) {
ProtectPhysicalMemory(physical_address, length, kWatchInvalid);
}
void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) { void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) {
auto lock = global_critical_region_.Acquire(); auto lock = global_critical_region_.Acquire();
@ -262,6 +314,13 @@ bool MMIOHandler::IsRangeWatched(uint32_t physical_address, size_t length) {
bool MMIOHandler::CheckAccessWatch(uint32_t physical_address) { bool MMIOHandler::CheckAccessWatch(uint32_t physical_address) {
auto lock = global_critical_region_.Acquire(); auto lock = global_critical_region_.Acquire();
if (global_physical_watch_callback_ != nullptr) {
if (global_physical_watch_callback_(global_physical_watch_callback_context_,
physical_address)) {
return true;
}
}
bool hit = false; bool hit = false;
for (auto it = access_watches_.begin(); it != access_watches_.end();) { for (auto it = access_watches_.begin(); it != access_watches_.end();) {
auto entry = *it; auto entry = *it;

View File

@ -30,6 +30,7 @@ typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context,
uint32_t addr, uint32_t value); uint32_t addr, uint32_t value);
typedef void (*AccessWatchCallback)(void* context_ptr, void* data_ptr, typedef void (*AccessWatchCallback)(void* context_ptr, void* data_ptr,
uint32_t address); uint32_t address);
typedef bool (*GlobalAccessWatchCallback)(void* context_ptr, uint32_t address);
struct MMIORange { struct MMIORange {
uint32_t address; uint32_t address;
@ -74,6 +75,12 @@ class MMIOHandler {
void* callback_context, void* callback_data); void* callback_context, void* callback_data);
void CancelAccessWatch(uintptr_t watch_handle); void CancelAccessWatch(uintptr_t watch_handle);
void SetGlobalPhysicalAccessWatch(GlobalAccessWatchCallback callback,
void* callback_context);
void ProtectPhysicalMemory(uint32_t physical_address, uint32_t length,
WatchType type);
void UnprotectPhysicalMemory(uint32_t physical_address, uint32_t length);
// Fires and clears any access watches that overlap this range. // Fires and clears any access watches that overlap this range.
void InvalidateRange(uint32_t physical_address, size_t length); void InvalidateRange(uint32_t physical_address, size_t length);
@ -112,6 +119,8 @@ class MMIOHandler {
xe::global_critical_region global_critical_region_; xe::global_critical_region global_critical_region_;
// TODO(benvanik): data structure magic. // TODO(benvanik): data structure magic.
std::list<AccessWatchEntry*> access_watches_; std::list<AccessWatchEntry*> access_watches_;
GlobalAccessWatchCallback global_physical_watch_callback_ = nullptr;
void* global_physical_watch_callback_context_;
static MMIOHandler* global_handler_; static MMIOHandler* global_handler_;
}; };

View File

@ -67,6 +67,7 @@ void D3D12CommandProcessor::ShutdownContext() {
context->AwaitAllFramesCompletion(); context->AwaitAllFramesCompletion();
pipeline_cache_.reset(); pipeline_cache_.reset();
shared_memory_.reset();
for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) { for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) {
command_lists_[i].reset(); command_lists_[i].reset();

View File

@ -79,10 +79,14 @@ bool SharedMemory::Initialize() {
upload_buffer_submitted_first_ = nullptr; upload_buffer_submitted_first_ = nullptr;
upload_buffer_submitted_last_ = nullptr; upload_buffer_submitted_last_ = nullptr;
memory_->SetGlobalPhysicalAccessWatch(WatchCallbackThunk, this);
return true; return true;
} }
void SharedMemory::Shutdown() { void SharedMemory::Shutdown() {
memory_->SetGlobalPhysicalAccessWatch(nullptr, nullptr);
while (upload_buffer_available_first_ != nullptr) { while (upload_buffer_available_first_ != nullptr) {
auto upload_buffer_next = upload_buffer_available_first_->next; auto upload_buffer_next = upload_buffer_available_first_->next;
upload_buffer_available_first_->buffer->Release(); upload_buffer_available_first_->buffer->Release();
@ -111,6 +115,8 @@ void SharedMemory::Shutdown() {
} }
void SharedMemory::BeginFrame() { void SharedMemory::BeginFrame() {
// XELOGGPU("SharedMemory: BeginFrame start");
// Check triggered watches, clear them and mark modified pages as out of date. // Check triggered watches, clear them and mark modified pages as out of date.
watch_mutex_.lock(); watch_mutex_.lock();
for (uint32_t i = 0; i < watches_triggered_l2_.size(); ++i) { for (uint32_t i = 0; i < watches_triggered_l2_.size(); ++i) {
@ -142,10 +148,14 @@ void SharedMemory::BeginFrame() {
} }
heap_creation_failed_ = false; heap_creation_failed_ = false;
// XELOGGPU("SharedMemory: BeginFrame end");
} }
bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup, bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup,
ID3D12GraphicsCommandList* command_list_draw) { ID3D12GraphicsCommandList* command_list_draw) {
// XELOGGPU("SharedMemory: EndFrame start");
// Before drawing starts, it's assumed that the buffer is a copy destination. // Before drawing starts, it's assumed that the buffer is a copy destination.
// This transition is for the next frame, not for the current one. // This transition is for the next frame, not for the current one.
TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list_draw); TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list_draw);
@ -162,10 +172,10 @@ bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup,
uint32_t upload_range_start = 0, upload_range_length; uint32_t upload_range_start = 0, upload_range_length;
while ((upload_range_start = while ((upload_range_start =
NextUploadRange(upload_end, upload_range_length)) != UINT_MAX) { NextUploadRange(upload_end, upload_range_length)) != UINT_MAX) {
XELOGGPU( /* XELOGGPU(
"Shared memory: Uploading %.8X-%.8X range", "Shared memory: Uploading %.8X-%.8X range",
upload_range_start << page_size_log2_, upload_range_start << page_size_log2_,
((upload_range_start + upload_range_length) << page_size_log2_) - 1); ((upload_range_start + upload_range_length) << page_size_log2_) - 1); */
while (upload_range_length > 0) { while (upload_range_length > 0) {
if (upload_buffer_mapping == nullptr) { if (upload_buffer_mapping == nullptr) {
// Create a completely new upload buffer if the available pool is empty. // Create a completely new upload buffer if the available pool is empty.
@ -215,7 +225,7 @@ bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup,
(upload_buffer_written << page_size_log2_), (upload_buffer_written << page_size_log2_),
memory_->TranslatePhysical(upload_range_start << page_size_log2_), memory_->TranslatePhysical(upload_range_start << page_size_log2_),
upload_write_length << page_size_log2_); upload_write_length << page_size_log2_);
command_list_draw->CopyBufferRegion( command_list_setup->CopyBufferRegion(
buffer_, upload_range_start << page_size_log2_, buffer_, upload_range_start << page_size_log2_,
upload_buffer_available_first_->buffer, upload_buffer_available_first_->buffer,
upload_buffer_written << page_size_log2_, upload_buffer_written << page_size_log2_,
@ -265,6 +275,42 @@ bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup,
upload_buffer_submitted_last_ = upload_buffer; upload_buffer_submitted_last_ = upload_buffer;
} }
// Protect the uploaded ranges.
// TODO(Triang3l): Add L2 or store ranges in a list - this may hold the mutex
// for pretty long.
if (upload_end != 0) {
watch_mutex_.lock();
uint32_t protect_end = 0, protect_start, protect_length;
while ((protect_start = NextUploadRange(protect_end, protect_length)) !=
UINT_MAX) {
if (protect_start >= upload_end) {
break;
}
protect_length = std::min(protect_length, upload_end - protect_start);
uint32_t protect_last = protect_start + protect_length - 1;
uint32_t protect_block_first = protect_start >> 6;
uint32_t protect_block_last = protect_last >> 6;
for (uint32_t i = protect_block_first; i <= protect_block_last; ++i) {
uint64_t protect_bits = ~0ull;
if (i == protect_block_first) {
protect_bits &= ~((1ull << (protect_start & 63)) - 1);
}
if (i == protect_block_last && (protect_last & 63) != 63) {
protect_bits &= (1ull << ((protect_last & 63) + 1)) - 1;
}
watched_pages_[i] |= protect_bits;
}
memory_->ProtectPhysicalMemory(protect_start << page_size_log2_,
protect_length << page_size_log2_,
cpu::MMIOHandler::WatchType::kWatchWrite);
protect_end = protect_last + 1;
if (protect_end >= upload_end) {
break;
}
}
watch_mutex_.unlock();
}
// Mark the newly uploaded ranges as uploaded. // Mark the newly uploaded ranges as uploaded.
std::memset(upload_pages_.data(), 0, (upload_end >> 6) * sizeof(uint64_t)); std::memset(upload_pages_.data(), 0, (upload_end >> 6) * sizeof(uint64_t));
if (upload_end < page_count_) { if (upload_end < page_count_) {
@ -279,6 +325,8 @@ bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup,
} }
} }
// XELOGGPU("SharedMemory: EndFrame end");
return upload_end != 0; return upload_end != 0;
} }
@ -325,7 +373,7 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
return false; return false;
} }
uint32_t last = start + length - 1; uint32_t last = start + length - 1;
XELOGGPU("Shared memory: Range %.8X-%.8X is being used", start, last); // XELOGGPU("Shared memory: Range %.8X-%.8X is being used", start, last);
// Ensure all tile heaps are present. // Ensure all tile heaps are present.
uint32_t heap_first = start >> kHeapSizeLog2; uint32_t heap_first = start >> kHeapSizeLog2;
@ -339,9 +387,9 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
// current frame anymore if have failed at least once. // current frame anymore if have failed at least once.
return false; return false;
} }
XELOGGPU("Shared memory: Creating %.8X-%.8X tile heap", /* XELOGGPU("Shared memory: Creating %.8X-%.8X tile heap",
heap_first << kHeapSizeLog2, heap_first << kHeapSizeLog2,
(heap_last << kHeapSizeLog2) + (kHeapSize - 1)); (heap_last << kHeapSizeLog2) + (kHeapSize - 1)); */
auto provider = context_->GetD3D12Provider(); auto provider = context_->GetD3D12Provider();
auto device = provider->GetDevice(); auto device = provider->GetDevice();
auto direct_queue = provider->GetDirectQueue(); auto direct_queue = provider->GetDirectQueue();
@ -379,6 +427,8 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
// Mark the outdated tiles in this range as requiring upload, and also make // Mark the outdated tiles in this range as requiring upload, and also make
// them up-to-date so textures aren't invalidated every use. // them up-to-date so textures aren't invalidated every use.
// TODO(Triang3l): Invalidate textures referencing outdated pages. // TODO(Triang3l): Invalidate textures referencing outdated pages.
// Safe invalidate textures here because only actually used ranges will be
// uploaded and marked as in-sync at the end of the frame.
uint32_t page_first_index = start >> page_size_log2_; uint32_t page_first_index = start >> page_size_log2_;
uint32_t page_last_index = last >> page_size_log2_; uint32_t page_last_index = last >> page_size_log2_;
uint32_t block_first_index = page_first_index >> 6; uint32_t block_first_index = page_first_index >> 6;
@ -398,6 +448,39 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
return true; return true;
} }
bool SharedMemory::WatchCallbackThunk(void* context_ptr, uint32_t address) {
return reinterpret_cast<SharedMemory*>(context_ptr)->WatchCallback(address);
}
bool SharedMemory::WatchCallback(uint32_t address) {
address &= 0x1FFFFFFF;
uint32_t page_index_l1_global = address >> page_size_log2_;
uint32_t block_index_l1 = page_index_l1_global >> 6;
uint64_t page_bit_l1 = 1ull << (page_index_l1_global & 63);
std::lock_guard<std::mutex> lock(watch_mutex_);
if (!(watched_pages_[block_index_l1] & page_bit_l1)) {
return false;
}
// XELOGGPU("Shared memory: Watch triggered for %.8X", address);
// Mark the page as modified.
uint32_t block_index_l2 = block_index_l1 >> 6;
uint64_t page_bit_l2 = 1ull << (block_index_l1 & 63);
if (!(watches_triggered_l2_[block_index_l2] & page_bit_l2)) {
watches_triggered_l2_[block_index_l2] |= page_bit_l2;
// L1 is not cleared in BeginFrame, so clear it now.
watches_triggered_l1_[block_index_l1] = 0;
}
watches_triggered_l1_[block_index_l1] |= page_bit_l1;
// Unprotect the page.
memory_->UnprotectPhysicalMemory(page_index_l1_global << page_size_log2_,
1 << page_size_log2_);
watched_pages_[block_index_l1] &= ~page_bit_l1;
return true;
}
void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state, void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state,
ID3D12GraphicsCommandList* command_list) { ID3D12GraphicsCommandList* command_list) {
if (buffer_state_ == new_state) { if (buffer_state_ == new_state) {

View File

@ -94,6 +94,9 @@ class SharedMemory {
// cleared! // cleared!
std::vector<uint64_t> watches_triggered_l1_; std::vector<uint64_t> watches_triggered_l1_;
std::vector<uint64_t> watches_triggered_l2_; std::vector<uint64_t> watches_triggered_l2_;
// Memory access callback.
static bool WatchCallbackThunk(void* context_ptr, uint32_t address);
bool WatchCallback(uint32_t address);
// Pages that need to be uploaded in this frame (that are used but modified). // Pages that need to be uploaded in this frame (that are used but modified).
std::vector<uint64_t> upload_pages_; std::vector<uint64_t> upload_pages_;

View File

@ -412,6 +412,21 @@ void Memory::CancelAccessWatch(uintptr_t watch_handle) {
mmio_handler_->CancelAccessWatch(watch_handle); mmio_handler_->CancelAccessWatch(watch_handle);
} }
void Memory::SetGlobalPhysicalAccessWatch(
cpu::GlobalAccessWatchCallback callback, void* callback_context) {
mmio_handler_->SetGlobalPhysicalAccessWatch(callback, callback_context);
}
void Memory::ProtectPhysicalMemory(uint32_t physical_address, uint32_t length,
cpu::MMIOHandler::WatchType type) {
mmio_handler_->ProtectPhysicalMemory(physical_address, length, type);
}
void Memory::UnprotectPhysicalMemory(uint32_t physical_address,
uint32_t length) {
mmio_handler_->UnprotectPhysicalMemory(physical_address, length);
}
uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment, uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment,
uint32_t system_heap_flags) { uint32_t system_heap_flags) {
// TODO(benvanik): lightweight pool. // TODO(benvanik): lightweight pool.

View File

@ -319,6 +319,22 @@ class Memory {
// Cancels a write watch requested with AddPhysicalAccessWatch. // Cancels a write watch requested with AddPhysicalAccessWatch.
void CancelAccessWatch(uintptr_t watch_handle); void CancelAccessWatch(uintptr_t watch_handle);
// Sets the default access watch callback for physical memory, which has a
// higher priority than watches - if it returns true, watches won't be
// triggered.
void SetGlobalPhysicalAccessWatch(cpu::GlobalAccessWatchCallback callback,
void* callback_context);
// Protects a physical memory range without adding a watch, primarily for use
// with the global physical access watch.
void ProtectPhysicalMemory(uint32_t physical_address, uint32_t length,
cpu::MMIOHandler::WatchType type);
// Unprotects a physical memory range previously protected using
// ProtectPhysicalMemory, primarily for use with the global physical access
// watch.
void UnprotectPhysicalMemory(uint32_t physical_address, uint32_t length);
// Allocates virtual memory from the 'system' heap. // Allocates virtual memory from the 'system' heap.
// System memory is kept separate from game memory but is still accessible // System memory is kept separate from game memory but is still accessible
// using normal guest virtual addresses. Kernel structures and other internal // using normal guest virtual addresses. Kernel structures and other internal