[Memory, D3D12] Various refactoring from data provider development
This commit is contained in:
parent
b59ae30ec3
commit
8ec813de82
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
#include "xenia/base/byte_order.h"
|
#include "xenia/base/byte_order.h"
|
||||||
|
@ -281,6 +282,14 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
|
||||||
if (ex->code() != Exception::Code::kAccessViolation) {
|
if (ex->code() != Exception::Code::kAccessViolation) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
Exception::AccessViolationOperation operation =
|
||||||
|
ex->access_violation_operation();
|
||||||
|
if (operation != Exception::AccessViolationOperation::kRead &&
|
||||||
|
operation != Exception::AccessViolationOperation::kWrite) {
|
||||||
|
// Data Execution Prevention or something else uninteresting.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool is_write = operation == Exception::AccessViolationOperation::kWrite;
|
||||||
if (ex->fault_address() < uint64_t(virtual_membase_) ||
|
if (ex->fault_address() < uint64_t(virtual_membase_) ||
|
||||||
ex->fault_address() > uint64_t(memory_end_)) {
|
ex->fault_address() > uint64_t(memory_end_)) {
|
||||||
// Quick kill anything outside our mapping.
|
// Quick kill anything outside our mapping.
|
||||||
|
@ -304,32 +313,23 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
|
||||||
}
|
}
|
||||||
if (!range) {
|
if (!range) {
|
||||||
// Recheck if the pages are still protected (race condition - another thread
|
// Recheck if the pages are still protected (race condition - another thread
|
||||||
// clears the writewatch we just hit).
|
// clears the watch we just hit).
|
||||||
// Do this under the lock so we don't introduce another race condition.
|
// Do this under the lock so we don't introduce another race condition.
|
||||||
auto lock = global_critical_region_.Acquire();
|
auto lock = global_critical_region_.Acquire();
|
||||||
memory::PageAccess cur_access;
|
memory::PageAccess cur_access;
|
||||||
size_t page_length = memory::page_size();
|
size_t page_length = memory::page_size();
|
||||||
memory::QueryProtect(fault_host_address, page_length, cur_access);
|
memory::QueryProtect(fault_host_address, page_length, cur_access);
|
||||||
if (cur_access != memory::PageAccess::kReadOnly &&
|
if (cur_access != memory::PageAccess::kNoAccess &&
|
||||||
cur_access != memory::PageAccess::kNoAccess) {
|
(!is_write || cur_access != memory::PageAccess::kReadOnly)) {
|
||||||
// Another thread has cleared this write watch. Abort.
|
// Another thread has cleared this watch. Abort.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The address is not found within any range, so either a write watch or an
|
// The address is not found within any range, so either a write watch or an
|
||||||
// actual access violation.
|
// actual access violation.
|
||||||
if (access_violation_callback_) {
|
if (access_violation_callback_) {
|
||||||
switch (ex->access_violation_operation()) {
|
return access_violation_callback_(std::move(lock),
|
||||||
case Exception::AccessViolationOperation::kRead:
|
access_violation_callback_context_,
|
||||||
return access_violation_callback_(access_violation_callback_context_,
|
fault_host_address, is_write);
|
||||||
fault_host_address, false);
|
|
||||||
case Exception::AccessViolationOperation::kWrite:
|
|
||||||
return access_violation_callback_(access_violation_callback_context_,
|
|
||||||
fault_host_address, true);
|
|
||||||
default:
|
|
||||||
// Data Execution Prevention or something else uninteresting.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#define XENIA_CPU_MMIO_HANDLER_H_
|
#define XENIA_CPU_MMIO_HANDLER_H_
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "xenia/base/mutex.h"
|
#include "xenia/base/mutex.h"
|
||||||
|
@ -44,12 +45,13 @@ class MMIOHandler {
|
||||||
|
|
||||||
typedef uint32_t (*HostToGuestVirtual)(const void* context,
|
typedef uint32_t (*HostToGuestVirtual)(const void* context,
|
||||||
const void* host_address);
|
const void* host_address);
|
||||||
typedef bool (*AccessViolationCallback)(void* context, void* host_address,
|
typedef bool (*AccessViolationCallback)(
|
||||||
bool is_write);
|
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||||
|
void* context, void* host_address, bool is_write);
|
||||||
|
|
||||||
// access_violation_callback is called in global_critical_region, so if
|
// access_violation_callback is called with global_critical_region locked once
|
||||||
// multiple threads trigger an access violation in the same page, the callback
|
// on the thread, so if multiple threads trigger an access violation in the
|
||||||
// will be called only once.
|
// same page, the callback will be called only once.
|
||||||
static std::unique_ptr<MMIOHandler> Install(
|
static std::unique_ptr<MMIOHandler> Install(
|
||||||
uint8_t* virtual_membase, uint8_t* physical_membase, uint8_t* membase_end,
|
uint8_t* virtual_membase, uint8_t* physical_membase, uint8_t* membase_end,
|
||||||
HostToGuestVirtual host_to_guest_virtual,
|
HostToGuestVirtual host_to_guest_virtual,
|
||||||
|
|
|
@ -87,8 +87,8 @@ void D3D12CommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
|
||||||
|
|
||||||
void D3D12CommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
void D3D12CommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
||||||
uint32_t length) {
|
uint32_t length) {
|
||||||
shared_memory_->MemoryWriteCallback(base_ptr, length, true);
|
shared_memory_->MemoryInvalidationCallback(base_ptr, length, true);
|
||||||
primitive_converter_->MemoryWriteCallback(base_ptr, length, true);
|
primitive_converter_->MemoryInvalidationCallback(base_ptr, length, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void D3D12CommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {
|
void D3D12CommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {
|
||||||
|
@ -866,6 +866,7 @@ bool D3D12CommandProcessor::SetupContext() {
|
||||||
if (FAILED(gamma_ramp_upload_->Map(
|
if (FAILED(gamma_ramp_upload_->Map(
|
||||||
0, nullptr, reinterpret_cast<void**>(&gamma_ramp_upload_mapping_)))) {
|
0, nullptr, reinterpret_cast<void**>(&gamma_ramp_upload_mapping_)))) {
|
||||||
XELOGE("Failed to map the gamma ramp upload buffer");
|
XELOGE("Failed to map the gamma ramp upload buffer");
|
||||||
|
gamma_ramp_upload_mapping_ = nullptr;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1827,42 +1828,24 @@ bool D3D12CommandProcessor::IssueCopy() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
|
||||||
#if FINE_GRAINED_DRAW_SCOPES
|
assert_true(await_submission <= submission_current_);
|
||||||
SCOPE_profile_cpu_f("gpu");
|
if (await_submission == submission_current_) {
|
||||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
assert_true(submission_open_);
|
||||||
|
EndSubmission(false);
|
||||||
bool is_opening_frame = is_guest_command && !frame_open_;
|
|
||||||
if (submission_open_ && !is_opening_frame) {
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the fence - needed for all kinds of submissions (to reclaim transient
|
uint64_t submission_completed_before = submission_completed_;
|
||||||
// resources early) and specifically for frames (not to queue too many).
|
|
||||||
submission_completed_ = submission_fence_->GetCompletedValue();
|
submission_completed_ = submission_fence_->GetCompletedValue();
|
||||||
if (is_opening_frame) {
|
if (submission_completed_ < await_submission) {
|
||||||
// Await the availability of the current frame.
|
submission_fence_->SetEventOnCompletion(await_submission,
|
||||||
uint64_t frame_current_last_submission =
|
submission_fence_completion_event_);
|
||||||
closed_frame_submissions_[frame_current_ % kQueueFrames];
|
|
||||||
if (frame_current_last_submission > submission_completed_) {
|
|
||||||
submission_fence_->SetEventOnCompletion(
|
|
||||||
frame_current_last_submission, submission_fence_completion_event_);
|
|
||||||
WaitForSingleObject(submission_fence_completion_event_, INFINITE);
|
WaitForSingleObject(submission_fence_completion_event_, INFINITE);
|
||||||
submission_completed_ = submission_fence_->GetCompletedValue();
|
submission_completed_ = submission_fence_->GetCompletedValue();
|
||||||
}
|
}
|
||||||
// Update the completed frame index, also obtaining the actual completed
|
if (submission_completed_ <= submission_completed_before) {
|
||||||
// frame number (since the CPU may be actually less than 3 frames behind)
|
// Not updated - no need to reclaim or download things.
|
||||||
// before reclaiming resources tracked with the frame number.
|
return;
|
||||||
frame_completed_ =
|
|
||||||
std::max(frame_current_, uint64_t(kQueueFrames)) - kQueueFrames;
|
|
||||||
for (uint64_t frame = frame_completed_ + 1; frame < frame_current_;
|
|
||||||
++frame) {
|
|
||||||
if (closed_frame_submissions_[frame % kQueueFrames] >
|
|
||||||
submission_completed_) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
frame_completed_ = frame;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reclaim command allocators.
|
// Reclaim command allocators.
|
||||||
|
@ -1898,6 +1881,46 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
||||||
}
|
}
|
||||||
buffers_for_deletion_.erase(buffers_for_deletion_.begin(), erase_buffers_end);
|
buffers_for_deletion_.erase(buffers_for_deletion_.begin(), erase_buffers_end);
|
||||||
|
|
||||||
|
shared_memory_->CompletedSubmissionUpdated();
|
||||||
|
|
||||||
|
render_target_cache_->CompletedSubmissionUpdated();
|
||||||
|
|
||||||
|
primitive_converter_->CompletedSubmissionUpdated();
|
||||||
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
||||||
|
#if FINE_GRAINED_DRAW_SCOPES
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
|
|
||||||
|
bool is_opening_frame = is_guest_command && !frame_open_;
|
||||||
|
if (submission_open_ && !is_opening_frame) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the fence - needed for all kinds of submissions (to reclaim transient
|
||||||
|
// resources early) and specifically for frames (not to queue too many), and
|
||||||
|
// await the availability of the current frame.
|
||||||
|
CheckSubmissionFence(
|
||||||
|
is_opening_frame
|
||||||
|
? closed_frame_submissions_[frame_current_ % kQueueFrames]
|
||||||
|
: 0);
|
||||||
|
if (is_opening_frame) {
|
||||||
|
// Update the completed frame index, also obtaining the actual completed
|
||||||
|
// frame number (since the CPU may be actually less than 3 frames behind)
|
||||||
|
// before reclaiming resources tracked with the frame number.
|
||||||
|
frame_completed_ =
|
||||||
|
std::max(frame_current_, uint64_t(kQueueFrames)) - kQueueFrames;
|
||||||
|
for (uint64_t frame = frame_completed_ + 1; frame < frame_current_;
|
||||||
|
++frame) {
|
||||||
|
if (closed_frame_submissions_[frame % kQueueFrames] >
|
||||||
|
submission_completed_) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
frame_completed_ = frame;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!submission_open_) {
|
if (!submission_open_) {
|
||||||
submission_open_ = true;
|
submission_open_ = true;
|
||||||
|
|
||||||
|
@ -1920,8 +1943,6 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
||||||
current_sampler_heap_ = nullptr;
|
current_sampler_heap_ = nullptr;
|
||||||
primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
||||||
|
|
||||||
shared_memory_->BeginSubmission();
|
|
||||||
|
|
||||||
render_target_cache_->BeginSubmission();
|
render_target_cache_->BeginSubmission();
|
||||||
|
|
||||||
primitive_converter_->BeginSubmission();
|
primitive_converter_->BeginSubmission();
|
||||||
|
|
|
@ -229,6 +229,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
// frame. EndSubmission(true) will close the frame no matter whether the
|
// frame. EndSubmission(true) will close the frame no matter whether the
|
||||||
// submission has already been closed.
|
// submission has already been closed.
|
||||||
|
|
||||||
|
// Rechecks submission number and reclaims per-submission resources. Pass 0 as
|
||||||
|
// the submission to await to simply check status.
|
||||||
|
void CheckSubmissionFence(uint64_t await_submission);
|
||||||
// If is_guest_command is true, a new full frame - with full cleanup of
|
// If is_guest_command is true, a new full frame - with full cleanup of
|
||||||
// resources and, if needed, starting capturing - is opened if pending (as
|
// resources and, if needed, starting capturing - is opened if pending (as
|
||||||
// opposed to simply resuming after mid-frame synchronization).
|
// opposed to simply resuming after mid-frame synchronization).
|
||||||
|
|
|
@ -124,16 +124,18 @@ bool PrimitiveConverter::Initialize() {
|
||||||
static_ib_gpu_address_ = static_ib_->GetGPUVirtualAddress();
|
static_ib_gpu_address_ = static_ib_->GetGPUVirtualAddress();
|
||||||
|
|
||||||
memory_regions_invalidated_.store(0ull, std::memory_order_relaxed);
|
memory_regions_invalidated_.store(0ull, std::memory_order_relaxed);
|
||||||
physical_write_watch_handle_ =
|
memory_invalidation_callback_handle_ =
|
||||||
memory_->RegisterPhysicalWriteWatch(MemoryWriteCallbackThunk, this);
|
memory_->RegisterPhysicalMemoryInvalidationCallback(
|
||||||
|
MemoryInvalidationCallbackThunk, this);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PrimitiveConverter::Shutdown() {
|
void PrimitiveConverter::Shutdown() {
|
||||||
if (physical_write_watch_handle_ != nullptr) {
|
if (memory_invalidation_callback_handle_ != nullptr) {
|
||||||
memory_->UnregisterPhysicalWriteWatch(physical_write_watch_handle_);
|
memory_->UnregisterPhysicalMemoryInvalidationCallback(
|
||||||
physical_write_watch_handle_ = nullptr;
|
memory_invalidation_callback_handle_);
|
||||||
|
memory_invalidation_callback_handle_ = nullptr;
|
||||||
}
|
}
|
||||||
ui::d3d12::util::ReleaseAndNull(static_ib_);
|
ui::d3d12::util::ReleaseAndNull(static_ib_);
|
||||||
ui::d3d12::util::ReleaseAndNull(static_ib_upload_);
|
ui::d3d12::util::ReleaseAndNull(static_ib_upload_);
|
||||||
|
@ -142,24 +144,25 @@ void PrimitiveConverter::Shutdown() {
|
||||||
|
|
||||||
void PrimitiveConverter::ClearCache() { buffer_pool_->ClearCache(); }
|
void PrimitiveConverter::ClearCache() { buffer_pool_->ClearCache(); }
|
||||||
|
|
||||||
|
void PrimitiveConverter::CompletedSubmissionUpdated() {
|
||||||
|
if (static_ib_upload_ && command_processor_->GetCompletedSubmission() >=
|
||||||
|
static_ib_upload_submission_) {
|
||||||
|
// Completely uploaded - release the upload buffer.
|
||||||
|
static_ib_upload_->Release();
|
||||||
|
static_ib_upload_ = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void PrimitiveConverter::BeginSubmission() {
|
void PrimitiveConverter::BeginSubmission() {
|
||||||
// Got a command list now - upload and transition the static index buffer if
|
// Got a command list now - upload and transition the static index buffer if
|
||||||
// needed.
|
// needed.
|
||||||
if (static_ib_upload_) {
|
if (static_ib_upload_ && static_ib_upload_submission_ == UINT64_MAX) {
|
||||||
if (static_ib_upload_submission_ == UINT64_MAX) {
|
|
||||||
// Not uploaded yet - upload.
|
|
||||||
command_processor_->GetDeferredCommandList()->D3DCopyResource(
|
command_processor_->GetDeferredCommandList()->D3DCopyResource(
|
||||||
static_ib_, static_ib_upload_);
|
static_ib_, static_ib_upload_);
|
||||||
command_processor_->PushTransitionBarrier(
|
command_processor_->PushTransitionBarrier(
|
||||||
static_ib_, D3D12_RESOURCE_STATE_COPY_DEST,
|
static_ib_, D3D12_RESOURCE_STATE_COPY_DEST,
|
||||||
D3D12_RESOURCE_STATE_INDEX_BUFFER);
|
D3D12_RESOURCE_STATE_INDEX_BUFFER);
|
||||||
static_ib_upload_submission_ = command_processor_->GetCurrentSubmission();
|
static_ib_upload_submission_ = command_processor_->GetCurrentSubmission();
|
||||||
} else if (command_processor_->GetCompletedSubmission() >=
|
|
||||||
static_ib_upload_submission_) {
|
|
||||||
// Completely uploaded - release the upload buffer.
|
|
||||||
static_ib_upload_->Release();
|
|
||||||
static_ib_upload_ = nullptr;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -706,7 +709,7 @@ void* PrimitiveConverter::AllocateIndices(
|
||||||
return mapping + simd_offset;
|
return mapping + simd_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallback(
|
std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryInvalidationCallback(
|
||||||
uint32_t physical_address_start, uint32_t length, bool exact_range) {
|
uint32_t physical_address_start, uint32_t length, bool exact_range) {
|
||||||
// 1 bit = (512 / 64) MB = 8 MB. Invalidate a region of this size.
|
// 1 bit = (512 / 64) MB = 8 MB. Invalidate a region of this size.
|
||||||
uint32_t bit_index_first = physical_address_start >> 23;
|
uint32_t bit_index_first = physical_address_start >> 23;
|
||||||
|
@ -719,11 +722,12 @@ std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallback(
|
||||||
return std::make_pair<uint32_t, uint32_t>(0, UINT32_MAX);
|
return std::make_pair<uint32_t, uint32_t>(0, UINT32_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallbackThunk(
|
std::pair<uint32_t, uint32_t>
|
||||||
|
PrimitiveConverter::MemoryInvalidationCallbackThunk(
|
||||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||||
bool exact_range) {
|
bool exact_range) {
|
||||||
return reinterpret_cast<PrimitiveConverter*>(context_ptr)
|
return reinterpret_cast<PrimitiveConverter*>(context_ptr)
|
||||||
->MemoryWriteCallback(physical_address_start, length, exact_range);
|
->MemoryInvalidationCallback(physical_address_start, length, exact_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer(
|
D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer(
|
||||||
|
|
|
@ -46,6 +46,7 @@ class PrimitiveConverter {
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
void ClearCache();
|
void ClearCache();
|
||||||
|
|
||||||
|
void CompletedSubmissionUpdated();
|
||||||
void BeginSubmission();
|
void BeginSubmission();
|
||||||
void BeginFrame();
|
void BeginFrame();
|
||||||
|
|
||||||
|
@ -83,7 +84,7 @@ class PrimitiveConverter {
|
||||||
uint32_t& index_count_out) const;
|
uint32_t& index_count_out) const;
|
||||||
|
|
||||||
// Callback for invalidating buffers mid-frame.
|
// Callback for invalidating buffers mid-frame.
|
||||||
std::pair<uint32_t, uint32_t> MemoryWriteCallback(
|
std::pair<uint32_t, uint32_t> MemoryInvalidationCallback(
|
||||||
uint32_t physical_address_start, uint32_t length, bool exact_range);
|
uint32_t physical_address_start, uint32_t length, bool exact_range);
|
||||||
|
|
||||||
void InitializeTrace();
|
void InitializeTrace();
|
||||||
|
@ -96,7 +97,7 @@ class PrimitiveConverter {
|
||||||
uint32_t simd_offset,
|
uint32_t simd_offset,
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out);
|
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out);
|
||||||
|
|
||||||
static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk(
|
static std::pair<uint32_t, uint32_t> MemoryInvalidationCallbackThunk(
|
||||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||||
bool exact_range);
|
bool exact_range);
|
||||||
|
|
||||||
|
@ -176,7 +177,7 @@ class PrimitiveConverter {
|
||||||
// the cache.
|
// the cache.
|
||||||
uint64_t memory_regions_used_;
|
uint64_t memory_regions_used_;
|
||||||
std::atomic<uint64_t> memory_regions_invalidated_ = 0;
|
std::atomic<uint64_t> memory_regions_invalidated_ = 0;
|
||||||
void* physical_write_watch_handle_ = nullptr;
|
void* memory_invalidation_callback_handle_ = nullptr;
|
||||||
uint32_t system_page_size_;
|
uint32_t system_page_size_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -455,12 +455,14 @@ void RenderTargetCache::ClearCache() {
|
||||||
edram_snapshot_restore_pool_.reset();
|
edram_snapshot_restore_pool_.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderTargetCache::BeginSubmission() {
|
void RenderTargetCache::CompletedSubmissionUpdated() {
|
||||||
if (edram_snapshot_restore_pool_) {
|
if (edram_snapshot_restore_pool_) {
|
||||||
edram_snapshot_restore_pool_->Reclaim(
|
edram_snapshot_restore_pool_->Reclaim(
|
||||||
command_processor_->GetCompletedSubmission());
|
command_processor_->GetCompletedSubmission());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RenderTargetCache::BeginSubmission() {
|
||||||
// With the ROV, a submission does not always end in a resolve (for example,
|
// With the ROV, a submission does not always end in a resolve (for example,
|
||||||
// when memexport readback happens) or something else that would surely submit
|
// when memexport readback happens) or something else that would surely submit
|
||||||
// the UAV barrier, so we need to preserve the `current_` variables.
|
// the UAV barrier, so we need to preserve the `current_` variables.
|
||||||
|
@ -1417,7 +1419,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!shared_memory->MakeTilesResident(dest_modified_start,
|
if (!shared_memory->EnsureTilesResident(dest_modified_start,
|
||||||
dest_modified_length)) {
|
dest_modified_length)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -257,6 +257,7 @@ class RenderTargetCache {
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
void ClearCache();
|
void ClearCache();
|
||||||
|
|
||||||
|
void CompletedSubmissionUpdated();
|
||||||
void BeginSubmission();
|
void BeginSubmission();
|
||||||
void EndFrame();
|
void EndFrame();
|
||||||
// Called in the beginning of a draw call - may bind pipelines.
|
// Called in the beginning of a draw call - may bind pipelines.
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
|
@ -49,11 +50,6 @@ SharedMemory::SharedMemory(D3D12CommandProcessor* command_processor,
|
||||||
trace_writer_(trace_writer) {
|
trace_writer_(trace_writer) {
|
||||||
page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size()));
|
page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size()));
|
||||||
page_count_ = kBufferSize >> page_size_log2_;
|
page_count_ = kBufferSize >> page_size_log2_;
|
||||||
uint32_t page_bitmap_length = page_count_ >> 6;
|
|
||||||
assert_true(page_bitmap_length != 0);
|
|
||||||
|
|
||||||
// Two interleaved bit arrays.
|
|
||||||
valid_and_gpu_written_pages_.resize(page_bitmap_length << 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SharedMemory::~SharedMemory() { Shutdown(); }
|
SharedMemory::~SharedMemory() { Shutdown(); }
|
||||||
|
@ -125,14 +121,16 @@ bool SharedMemory::Initialize() {
|
||||||
uint32_t(BufferDescriptorIndex::kRawUAV)),
|
uint32_t(BufferDescriptorIndex::kRawUAV)),
|
||||||
buffer_, kBufferSize);
|
buffer_, kBufferSize);
|
||||||
|
|
||||||
std::memset(valid_and_gpu_written_pages_.data(), 0,
|
system_page_flags_.clear();
|
||||||
valid_and_gpu_written_pages_.size() * sizeof(uint64_t));
|
system_page_flags_.resize((page_count_ + 63) / 64);
|
||||||
|
|
||||||
upload_buffer_pool_ =
|
upload_buffer_pool_ = std::make_unique<ui::d3d12::UploadBufferPool>(
|
||||||
std::make_unique<ui::d3d12::UploadBufferPool>(device, 4 * 1024 * 1024);
|
device,
|
||||||
|
xe::align(uint32_t(4 * 1024 * 1024), uint32_t(1) << page_size_log2_));
|
||||||
|
|
||||||
physical_write_watch_handle_ =
|
memory_invalidation_callback_handle_ =
|
||||||
memory_->RegisterPhysicalWriteWatch(MemoryWriteCallbackThunk, this);
|
memory_->RegisterPhysicalMemoryInvalidationCallback(
|
||||||
|
MemoryInvalidationCallbackThunk, this);
|
||||||
|
|
||||||
ResetTraceGPUWrittenBuffer();
|
ResetTraceGPUWrittenBuffer();
|
||||||
|
|
||||||
|
@ -144,9 +142,10 @@ void SharedMemory::Shutdown() {
|
||||||
|
|
||||||
// TODO(Triang3l): Do something in case any watches are still registered.
|
// TODO(Triang3l): Do something in case any watches are still registered.
|
||||||
|
|
||||||
if (physical_write_watch_handle_ != nullptr) {
|
if (memory_invalidation_callback_handle_ != nullptr) {
|
||||||
memory_->UnregisterPhysicalWriteWatch(physical_write_watch_handle_);
|
memory_->UnregisterPhysicalMemoryInvalidationCallback(
|
||||||
physical_write_watch_handle_ = nullptr;
|
memory_invalidation_callback_handle_);
|
||||||
|
memory_invalidation_callback_handle_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
upload_buffer_pool_.reset();
|
upload_buffer_pool_.reset();
|
||||||
|
@ -165,7 +164,7 @@ void SharedMemory::Shutdown() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SharedMemory::BeginSubmission() {
|
void SharedMemory::CompletedSubmissionUpdated() {
|
||||||
upload_buffer_pool_->Reclaim(command_processor_->GetCompletedSubmission());
|
upload_buffer_pool_->Reclaim(command_processor_->GetCompletedSubmission());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -273,7 +272,7 @@ void SharedMemory::UnwatchMemoryRange(WatchHandle handle) {
|
||||||
UnlinkWatchRange(reinterpret_cast<WatchRange*>(handle));
|
UnlinkWatchRange(reinterpret_cast<WatchRange*>(handle));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) {
|
bool SharedMemory::EnsureTilesResident(uint32_t start, uint32_t length) {
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
// Some texture is empty, for example - safe to draw in this case.
|
// Some texture is empty, for example - safe to draw in this case.
|
||||||
return true;
|
return true;
|
||||||
|
@ -347,7 +346,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
|
||||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
|
|
||||||
// Ensure all tile heaps are present.
|
// Ensure all tile heaps are present.
|
||||||
if (!MakeTilesResident(start, length)) {
|
if (!EnsureTilesResident(start, length)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -375,7 +374,8 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
uint32_t upload_buffer_pages = upload_buffer_size >> page_size_log2_;
|
uint32_t upload_buffer_pages = upload_buffer_size >> page_size_log2_;
|
||||||
MakeRangeValid(upload_range_start, upload_buffer_pages, false);
|
MakeRangeValid(upload_range_start << page_size_log2_,
|
||||||
|
upload_buffer_pages << page_size_log2_, false);
|
||||||
std::memcpy(
|
std::memcpy(
|
||||||
upload_buffer_mapping,
|
upload_buffer_mapping,
|
||||||
memory_->TranslatePhysical(upload_range_start << page_size_log2_),
|
memory_->TranslatePhysical(upload_range_start << page_size_log2_),
|
||||||
|
@ -439,7 +439,7 @@ void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) {
|
||||||
|
|
||||||
// Mark the range as valid (so pages are not reuploaded until modified by the
|
// Mark the range as valid (so pages are not reuploaded until modified by the
|
||||||
// CPU) and watch it so the CPU can reuse it and this will be caught.
|
// CPU) and watch it so the CPU can reuse it and this will be caught.
|
||||||
MakeRangeValid(page_first, page_last - page_first + 1, true);
|
MakeRangeValid(start, length, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SharedMemory::AreTiledResourcesUsed() const {
|
bool SharedMemory::AreTiledResourcesUsed() const {
|
||||||
|
@ -453,14 +453,15 @@ bool SharedMemory::AreTiledResourcesUsed() const {
|
||||||
provider->GetGraphicsAnalysis() == nullptr;
|
provider->GetGraphicsAnalysis() == nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SharedMemory::MakeRangeValid(uint32_t valid_page_first,
|
void SharedMemory::MakeRangeValid(uint32_t start, uint32_t length,
|
||||||
uint32_t valid_page_count,
|
|
||||||
bool written_by_gpu) {
|
bool written_by_gpu) {
|
||||||
if (valid_page_first >= page_count_ || valid_page_count == 0) {
|
if (length == 0 || start >= kBufferSize) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
valid_page_count = std::min(valid_page_count, page_count_ - valid_page_first);
|
length = std::min(length, kBufferSize - start);
|
||||||
uint32_t valid_page_last = valid_page_first + valid_page_count - 1;
|
uint32_t last = start + length - 1;
|
||||||
|
uint32_t valid_page_first = start >> page_size_log2_;
|
||||||
|
uint32_t valid_page_last = last >> page_size_log2_;
|
||||||
uint32_t valid_block_first = valid_page_first >> 6;
|
uint32_t valid_block_first = valid_page_first >> 6;
|
||||||
uint32_t valid_block_last = valid_page_last >> 6;
|
uint32_t valid_block_last = valid_page_last >> 6;
|
||||||
|
|
||||||
|
@ -475,18 +476,21 @@ void SharedMemory::MakeRangeValid(uint32_t valid_page_first,
|
||||||
if (i == valid_block_last && (valid_page_last & 63) != 63) {
|
if (i == valid_block_last && (valid_page_last & 63) != 63) {
|
||||||
valid_bits &= (1ull << ((valid_page_last & 63) + 1)) - 1;
|
valid_bits &= (1ull << ((valid_page_last & 63) + 1)) - 1;
|
||||||
}
|
}
|
||||||
valid_and_gpu_written_pages_[i << 1] |= valid_bits;
|
SystemPageFlagsBlock& block = system_page_flags_[i];
|
||||||
|
block.valid |= valid_bits;
|
||||||
if (written_by_gpu) {
|
if (written_by_gpu) {
|
||||||
valid_and_gpu_written_pages_[(i << 1) + 1] |= valid_bits;
|
block.valid_and_gpu_written |= valid_bits;
|
||||||
} else {
|
} else {
|
||||||
valid_and_gpu_written_pages_[(i << 1) + 1] &= ~valid_bits;
|
block.valid_and_gpu_written &= ~valid_bits;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (physical_write_watch_handle_) {
|
if (memory_invalidation_callback_handle_) {
|
||||||
memory_->WatchPhysicalMemoryWrite(valid_page_first << page_size_log2_,
|
memory_->EnablePhysicalMemoryAccessCallbacks(
|
||||||
valid_page_count << page_size_log2_);
|
valid_page_first << page_size_log2_,
|
||||||
|
(valid_page_last - valid_page_first + 1) << page_size_log2_, true,
|
||||||
|
false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -527,7 +531,7 @@ void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
|
||||||
|
|
||||||
uint32_t range_start = UINT32_MAX;
|
uint32_t range_start = UINT32_MAX;
|
||||||
for (uint32_t i = request_block_first; i <= request_block_last; ++i) {
|
for (uint32_t i = request_block_first; i <= request_block_last; ++i) {
|
||||||
uint64_t block_valid = valid_and_gpu_written_pages_[i << 1];
|
uint64_t block_valid = system_page_flags_[i].valid;
|
||||||
// Consider pages in the block outside the requested range valid.
|
// Consider pages in the block outside the requested range valid.
|
||||||
if (i == request_block_first) {
|
if (i == request_block_first) {
|
||||||
block_valid |= (1ull << (request_page_first & 63)) - 1;
|
block_valid |= (1ull << (request_page_first & 63)) - 1;
|
||||||
|
@ -569,17 +573,23 @@ void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallbackThunk(
|
std::pair<uint32_t, uint32_t> SharedMemory::MemoryInvalidationCallbackThunk(
|
||||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||||
bool exact_range) {
|
bool exact_range) {
|
||||||
return reinterpret_cast<SharedMemory*>(context_ptr)
|
return reinterpret_cast<SharedMemory*>(context_ptr)
|
||||||
->MemoryWriteCallback(physical_address_start, length, exact_range);
|
->MemoryInvalidationCallback(physical_address_start, length, exact_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallback(
|
std::pair<uint32_t, uint32_t> SharedMemory::MemoryInvalidationCallback(
|
||||||
uint32_t physical_address_start, uint32_t length, bool exact_range) {
|
uint32_t physical_address_start, uint32_t length, bool exact_range) {
|
||||||
|
if (length == 0 || physical_address_start >= kBufferSize) {
|
||||||
|
return std::make_pair(uint32_t(0), UINT32_MAX);
|
||||||
|
}
|
||||||
|
length = std::min(length, kBufferSize - physical_address_start);
|
||||||
|
uint32_t physical_address_last = physical_address_start + (length - 1);
|
||||||
|
|
||||||
uint32_t page_first = physical_address_start >> page_size_log2_;
|
uint32_t page_first = physical_address_start >> page_size_log2_;
|
||||||
uint32_t page_last = (physical_address_start + length - 1) >> page_size_log2_;
|
uint32_t page_last = physical_address_last >> page_size_log2_;
|
||||||
assert_true(page_first < page_count_ && page_last < page_count_);
|
assert_true(page_first < page_count_ && page_last < page_count_);
|
||||||
uint32_t block_first = page_first >> 6;
|
uint32_t block_first = page_first >> 6;
|
||||||
uint32_t block_last = page_last >> 6;
|
uint32_t block_last = page_last >> 6;
|
||||||
|
@ -596,14 +606,14 @@ std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallback(
|
||||||
// frame, but with 256 KB it's 0.7 ms.
|
// frame, but with 256 KB it's 0.7 ms.
|
||||||
if (page_first & 63) {
|
if (page_first & 63) {
|
||||||
uint64_t gpu_written_start =
|
uint64_t gpu_written_start =
|
||||||
valid_and_gpu_written_pages_[(block_first << 1) + 1];
|
system_page_flags_[block_first].valid_and_gpu_written;
|
||||||
gpu_written_start &= (1ull << (page_first & 63)) - 1;
|
gpu_written_start &= (1ull << (page_first & 63)) - 1;
|
||||||
page_first =
|
page_first =
|
||||||
(page_first & ~uint32_t(63)) + (64 - xe::lzcnt(gpu_written_start));
|
(page_first & ~uint32_t(63)) + (64 - xe::lzcnt(gpu_written_start));
|
||||||
}
|
}
|
||||||
if ((page_last & 63) != 63) {
|
if ((page_last & 63) != 63) {
|
||||||
uint64_t gpu_written_end =
|
uint64_t gpu_written_end =
|
||||||
valid_and_gpu_written_pages_[(block_last << 1) + 1];
|
system_page_flags_[block_last].valid_and_gpu_written;
|
||||||
gpu_written_end &= ~((1ull << ((page_last & 63) + 1)) - 1);
|
gpu_written_end &= ~((1ull << ((page_last & 63) + 1)) - 1);
|
||||||
page_last = (page_last & ~uint32_t(63)) +
|
page_last = (page_last & ~uint32_t(63)) +
|
||||||
(std::max(xe::tzcnt(gpu_written_end), uint8_t(1)) - 1);
|
(std::max(xe::tzcnt(gpu_written_end), uint8_t(1)) - 1);
|
||||||
|
@ -618,8 +628,9 @@ std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallback(
|
||||||
if (i == block_last && (page_last & 63) != 63) {
|
if (i == block_last && (page_last & 63) != 63) {
|
||||||
invalidate_bits &= (1ull << ((page_last & 63) + 1)) - 1;
|
invalidate_bits &= (1ull << ((page_last & 63) + 1)) - 1;
|
||||||
}
|
}
|
||||||
valid_and_gpu_written_pages_[i << 1] &= ~invalidate_bits;
|
SystemPageFlagsBlock& block = system_page_flags_[i];
|
||||||
valid_and_gpu_written_pages_[(i << 1) + 1] &= ~invalidate_bits;
|
block.valid &= ~invalidate_bits;
|
||||||
|
block.valid_and_gpu_written &= ~invalidate_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
FireWatches(page_first, page_last, false);
|
FireWatches(page_first, page_last, false);
|
||||||
|
@ -664,10 +675,11 @@ bool SharedMemory::InitializeTraceSubmitDownloads() {
|
||||||
auto global_lock = global_critical_region_.Acquire();
|
auto global_lock = global_critical_region_.Acquire();
|
||||||
uint32_t fire_watches_range_start = UINT32_MAX;
|
uint32_t fire_watches_range_start = UINT32_MAX;
|
||||||
uint32_t gpu_written_range_start = UINT32_MAX;
|
uint32_t gpu_written_range_start = UINT32_MAX;
|
||||||
for (uint32_t i = 0; i * 2 < valid_and_gpu_written_pages_.size(); ++i) {
|
for (uint32_t i = 0; i < system_page_flags_.size(); ++i) {
|
||||||
uint64_t previously_valid_block = valid_and_gpu_written_pages_[i * 2];
|
SystemPageFlagsBlock& page_flags_block = system_page_flags_[i];
|
||||||
uint64_t gpu_written_block = valid_and_gpu_written_pages_[i * 2 + 1];
|
uint64_t previously_valid_block = page_flags_block.valid;
|
||||||
valid_and_gpu_written_pages_[i * 2] = gpu_written_block;
|
uint64_t gpu_written_block = page_flags_block.valid_and_gpu_written;
|
||||||
|
page_flags_block.valid = gpu_written_block;
|
||||||
|
|
||||||
// Fire watches on the invalidated pages.
|
// Fire watches on the invalidated pages.
|
||||||
uint64_t fire_watches_block = previously_valid_block & ~gpu_written_block;
|
uint64_t fire_watches_block = previously_valid_block & ~gpu_written_block;
|
||||||
|
@ -748,8 +760,8 @@ bool SharedMemory::InitializeTraceSubmitDownloads() {
|
||||||
&gpu_written_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
|
&gpu_written_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
|
||||||
IID_PPV_ARGS(&trace_gpu_written_buffer_)))) {
|
IID_PPV_ARGS(&trace_gpu_written_buffer_)))) {
|
||||||
XELOGE(
|
XELOGE(
|
||||||
"Failed to create a %u KB GPU-written memory download buffer for frame "
|
"Shared memory: Failed to create a %u KB GPU-written memory download "
|
||||||
"tracing",
|
"buffer for frame tracing",
|
||||||
gpu_written_page_count << page_size_log2_ >> 10);
|
gpu_written_page_count << page_size_log2_ >> 10);
|
||||||
ResetTraceGPUWrittenBuffer();
|
ResetTraceGPUWrittenBuffer();
|
||||||
return false;
|
return false;
|
||||||
|
@ -761,7 +773,7 @@ bool SharedMemory::InitializeTraceSubmitDownloads() {
|
||||||
for (auto& gpu_written_submit_range : trace_gpu_written_ranges_) {
|
for (auto& gpu_written_submit_range : trace_gpu_written_ranges_) {
|
||||||
// For cases like resolution scale, when the data may not be actually
|
// For cases like resolution scale, when the data may not be actually
|
||||||
// written, just marked as valid.
|
// written, just marked as valid.
|
||||||
if (!MakeTilesResident(gpu_written_submit_range.first,
|
if (!EnsureTilesResident(gpu_written_submit_range.first,
|
||||||
gpu_written_submit_range.second)) {
|
gpu_written_submit_range.second)) {
|
||||||
gpu_written_submit_range.second = 0;
|
gpu_written_submit_range.second = 0;
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
#define XENIA_GPU_D3D12_SHARED_MEMORY_H_
|
#define XENIA_GPU_D3D12_SHARED_MEMORY_H_
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -44,7 +43,7 @@ class SharedMemory {
|
||||||
return buffer_gpu_address_;
|
return buffer_gpu_address_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BeginSubmission();
|
void CompletedSubmissionUpdated();
|
||||||
|
|
||||||
typedef void (*GlobalWatchCallback)(void* context, uint32_t address_first,
|
typedef void (*GlobalWatchCallback)(void* context, uint32_t address_first,
|
||||||
uint32_t address_last,
|
uint32_t address_last,
|
||||||
|
@ -57,7 +56,7 @@ class SharedMemory {
|
||||||
// example, if the game changes protection level of a memory range containing
|
// example, if the game changes protection level of a memory range containing
|
||||||
// the watched range.
|
// the watched range.
|
||||||
//
|
//
|
||||||
// The callback is called with the mutex locked.
|
// The callback is called within the global critical region.
|
||||||
GlobalWatchHandle RegisterGlobalWatch(GlobalWatchCallback callback,
|
GlobalWatchHandle RegisterGlobalWatch(GlobalWatchCallback callback,
|
||||||
void* callback_context);
|
void* callback_context);
|
||||||
void UnregisterGlobalWatch(GlobalWatchHandle handle);
|
void UnregisterGlobalWatch(GlobalWatchHandle handle);
|
||||||
|
@ -84,15 +83,10 @@ class SharedMemory {
|
||||||
void* callback_data, uint64_t callback_argument);
|
void* callback_data, uint64_t callback_argument);
|
||||||
// Unregisters previously registered watched memory range.
|
// Unregisters previously registered watched memory range.
|
||||||
void UnwatchMemoryRange(WatchHandle handle);
|
void UnwatchMemoryRange(WatchHandle handle);
|
||||||
// Locks the mutex that gets locked when watch callbacks are invoked - must be
|
|
||||||
// done when checking variables that may be changed by a watch callback.
|
|
||||||
inline std::unique_lock<std::recursive_mutex> LockWatchMutex() {
|
|
||||||
return global_critical_region_.Acquire();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensures the buffer tiles backing the range are resident, but doesn't upload
|
// Ensures the buffer tiles backing the range are resident, but doesn't upload
|
||||||
// anything.
|
// anything.
|
||||||
bool MakeTilesResident(uint32_t start, uint32_t length);
|
bool EnsureTilesResident(uint32_t start, uint32_t length);
|
||||||
|
|
||||||
// Checks if the range has been updated, uploads new data if needed and
|
// Checks if the range has been updated, uploads new data if needed and
|
||||||
// ensures the buffer tiles backing the range are resident. May transition the
|
// ensures the buffer tiles backing the range are resident. May transition the
|
||||||
|
@ -105,7 +99,7 @@ class SharedMemory {
|
||||||
// (to up to the first GPU-written page, as an access violation exception
|
// (to up to the first GPU-written page, as an access violation exception
|
||||||
// count optimization) as modified by the CPU, also invalidating GPU-written
|
// count optimization) as modified by the CPU, also invalidating GPU-written
|
||||||
// pages directly in the range.
|
// pages directly in the range.
|
||||||
std::pair<uint32_t, uint32_t> MemoryWriteCallback(
|
std::pair<uint32_t, uint32_t> MemoryInvalidationCallback(
|
||||||
uint32_t physical_address_start, uint32_t length, bool exact_range);
|
uint32_t physical_address_start, uint32_t length, bool exact_range);
|
||||||
|
|
||||||
// Marks the range as containing GPU-generated data (such as resolves),
|
// Marks the range as containing GPU-generated data (such as resolves),
|
||||||
|
@ -141,8 +135,7 @@ class SharedMemory {
|
||||||
bool AreTiledResourcesUsed() const;
|
bool AreTiledResourcesUsed() const;
|
||||||
|
|
||||||
// Mark the memory range as updated and protect it.
|
// Mark the memory range as updated and protect it.
|
||||||
void MakeRangeValid(uint32_t valid_page_first, uint32_t valid_page_count,
|
void MakeRangeValid(uint32_t start, uint32_t length, bool written_by_gpu);
|
||||||
bool written_by_gpu);
|
|
||||||
|
|
||||||
D3D12CommandProcessor* command_processor_;
|
D3D12CommandProcessor* command_processor_;
|
||||||
Memory* memory_;
|
Memory* memory_;
|
||||||
|
@ -154,6 +147,7 @@ class SharedMemory {
|
||||||
ID3D12Resource* buffer_ = nullptr;
|
ID3D12Resource* buffer_ = nullptr;
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0;
|
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0;
|
||||||
D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
|
void TransitionBuffer(D3D12_RESOURCE_STATES new_state);
|
||||||
|
|
||||||
// Heaps are 4 MB, so not too many of them are allocated, but also not to
|
// Heaps are 4 MB, so not too many of them are allocated, but also not to
|
||||||
// waste too much memory for padding (with 16 MB there's too much).
|
// waste too much memory for padding (with 16 MB there's too much).
|
||||||
|
@ -166,9 +160,11 @@ class SharedMemory {
|
||||||
// Number of the heaps currently resident, for profiling.
|
// Number of the heaps currently resident, for profiling.
|
||||||
uint32_t heap_count_ = 0;
|
uint32_t heap_count_ = 0;
|
||||||
|
|
||||||
// Log2 of system page size.
|
// Log2 of invalidation granularity (the system page size, but the dependency
|
||||||
|
// on it is not hard - the access callback takes a range as an argument, and
|
||||||
|
// touched pages of the buffer of this size will be invalidated).
|
||||||
uint32_t page_size_log2_;
|
uint32_t page_size_log2_;
|
||||||
// Total physical page count.
|
// Total buffer page count.
|
||||||
uint32_t page_count_;
|
uint32_t page_count_;
|
||||||
|
|
||||||
// Non-shader-visible buffer descriptor heap for faster binding (via copying
|
// Non-shader-visible buffer descriptor heap for faster binding (via copying
|
||||||
|
@ -182,24 +178,46 @@ class SharedMemory {
|
||||||
ID3D12DescriptorHeap* buffer_descriptor_heap_ = nullptr;
|
ID3D12DescriptorHeap* buffer_descriptor_heap_ = nullptr;
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE buffer_descriptor_heap_start_;
|
D3D12_CPU_DESCRIPTOR_HANDLE buffer_descriptor_heap_start_;
|
||||||
|
|
||||||
// Handle of the physical memory write callback.
|
// First page and length in pages.
|
||||||
void* physical_write_watch_handle_ = nullptr;
|
typedef std::pair<uint32_t, uint32_t> UploadRange;
|
||||||
|
// Ranges that need to be uploaded, generated by GetRangesToUpload (a
|
||||||
|
// persistently allocated vector).
|
||||||
|
std::vector<UploadRange> upload_ranges_;
|
||||||
|
void GetRangesToUpload(uint32_t request_page_first,
|
||||||
|
uint32_t request_page_last);
|
||||||
|
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
|
||||||
|
|
||||||
// Mutex between the exception handler and the command processor, to be locked
|
// GPU-written memory downloading for traces.
|
||||||
// when checking or updating validity of pages/ranges.
|
// Start page, length in pages.
|
||||||
|
std::vector<std::pair<uint32_t, uint32_t>> trace_gpu_written_ranges_;
|
||||||
|
// Created temporarily, only for downloading.
|
||||||
|
ID3D12Resource* trace_gpu_written_buffer_ = nullptr;
|
||||||
|
void ResetTraceGPUWrittenBuffer();
|
||||||
|
|
||||||
|
void* memory_invalidation_callback_handle_ = nullptr;
|
||||||
|
void* memory_data_provider_handle_ = nullptr;
|
||||||
|
|
||||||
|
// Mutex between the guest memory subsystem and the command processor, to be
|
||||||
|
// locked when checking or updating validity of pages/ranges and when firing
|
||||||
|
// watches.
|
||||||
xe::global_critical_region global_critical_region_;
|
xe::global_critical_region global_critical_region_;
|
||||||
|
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
// Things below should be protected by global_critical_region.
|
// Things below should be fully protected by global_critical_region.
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
|
||||||
// Bit vector containing:
|
struct SystemPageFlagsBlock {
|
||||||
// - Even block indices - whether physical memory system pages are up to date.
|
// Whether each page is up to date in the GPU buffer.
|
||||||
// - Odd block indices - whether phyical memory system pages contain data
|
uint64_t valid;
|
||||||
// written by the GPU not synchronized with the CPU (subset of valid pages).
|
// Subset of valid pages - whether each page in the GPU buffer contains data
|
||||||
std::vector<uint64_t> valid_and_gpu_written_pages_;
|
// that was written on the GPU, thus should not be invalidated spuriously.
|
||||||
|
uint64_t valid_and_gpu_written;
|
||||||
|
};
|
||||||
|
// Flags for each 64 system pages, interleaved as blocks, so bit scan can be
|
||||||
|
// used to quickly extract ranges.
|
||||||
|
std::vector<SystemPageFlagsBlock> system_page_flags_;
|
||||||
|
|
||||||
static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk(
|
static std::pair<uint32_t, uint32_t> MemoryInvalidationCallbackThunk(
|
||||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||||
bool exact_range);
|
bool exact_range);
|
||||||
|
|
||||||
|
@ -259,30 +277,9 @@ class SharedMemory {
|
||||||
// watches.
|
// watches.
|
||||||
void FireWatches(uint32_t page_first, uint32_t page_last,
|
void FireWatches(uint32_t page_first, uint32_t page_last,
|
||||||
bool invalidated_by_gpu);
|
bool invalidated_by_gpu);
|
||||||
// Unlinks and frees the range and its nodes. Call this with the mutex locked.
|
// Unlinks and frees the range and its nodes. Call this in the global critical
|
||||||
|
// region.
|
||||||
void UnlinkWatchRange(WatchRange* range);
|
void UnlinkWatchRange(WatchRange* range);
|
||||||
|
|
||||||
// ***************************************************************************
|
|
||||||
// Things above should be protected by global_critical_region.
|
|
||||||
// ***************************************************************************
|
|
||||||
|
|
||||||
// First page and length in pages.
|
|
||||||
typedef std::pair<uint32_t, uint32_t> UploadRange;
|
|
||||||
// Ranges that need to be uploaded, generated by GetRangesToUpload (a
|
|
||||||
// persistently allocated vector).
|
|
||||||
std::vector<UploadRange> upload_ranges_;
|
|
||||||
void GetRangesToUpload(uint32_t request_page_first,
|
|
||||||
uint32_t request_page_last);
|
|
||||||
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
|
|
||||||
|
|
||||||
void TransitionBuffer(D3D12_RESOURCE_STATES new_state);
|
|
||||||
|
|
||||||
// GPU-written memory downloading for traces.
|
|
||||||
// Start page, length in pages.
|
|
||||||
std::vector<std::pair<uint32_t, uint32_t>> trace_gpu_written_ranges_;
|
|
||||||
// Created temporarily, only for downloading.
|
|
||||||
ID3D12Resource* trace_gpu_written_buffer_ = nullptr;
|
|
||||||
void ResetTraceGPUWrittenBuffer();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -1702,7 +1702,7 @@ void TextureCache::MarkRangeAsResolved(uint32_t start_unscaled,
|
||||||
uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12;
|
uint32_t page_last = (start_unscaled + length_unscaled - 1) >> 12;
|
||||||
uint32_t block_first = page_first >> 5;
|
uint32_t block_first = page_first >> 5;
|
||||||
uint32_t block_last = page_last >> 5;
|
uint32_t block_last = page_last >> 5;
|
||||||
auto watch_lock = shared_memory_->LockWatchMutex();
|
auto global_lock = global_critical_region_.Acquire();
|
||||||
for (uint32_t i = block_first; i <= block_last; ++i) {
|
for (uint32_t i = block_first; i <= block_last; ++i) {
|
||||||
uint32_t add_bits = UINT32_MAX;
|
uint32_t add_bits = UINT32_MAX;
|
||||||
if (i == block_first) {
|
if (i == block_first) {
|
||||||
|
@ -1812,7 +1812,7 @@ bool TextureCache::TileResolvedTexture(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!shared_memory_->MakeTilesResident(texture_modified_start,
|
if (!shared_memory_->EnsureTilesResident(texture_modified_start,
|
||||||
texture_modified_length)) {
|
texture_modified_length)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -2404,7 +2404,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
// See what we need to upload.
|
// See what we need to upload.
|
||||||
bool base_in_sync, mips_in_sync;
|
bool base_in_sync, mips_in_sync;
|
||||||
{
|
{
|
||||||
auto watch_lock = shared_memory_->LockWatchMutex();
|
auto global_lock = global_critical_region_.Acquire();
|
||||||
base_in_sync = texture->base_in_sync;
|
base_in_sync = texture->base_in_sync;
|
||||||
mips_in_sync = texture->mips_in_sync;
|
mips_in_sync = texture->mips_in_sync;
|
||||||
}
|
}
|
||||||
|
@ -2672,7 +2672,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
// regular texture or a vertex buffer, and thus the scaled resolve version is
|
// regular texture or a vertex buffer, and thus the scaled resolve version is
|
||||||
// not up to date anymore.
|
// not up to date anymore.
|
||||||
{
|
{
|
||||||
auto watch_lock = shared_memory_->LockWatchMutex();
|
auto global_lock = global_critical_region_.Acquire();
|
||||||
texture->base_in_sync = true;
|
texture->base_in_sync = true;
|
||||||
texture->mips_in_sync = true;
|
texture->mips_in_sync = true;
|
||||||
if (!base_in_sync) {
|
if (!base_in_sync) {
|
||||||
|
@ -2761,7 +2761,7 @@ bool TextureCache::IsRangeScaledResolved(uint32_t start_unscaled,
|
||||||
uint32_t block_last = page_last >> 5;
|
uint32_t block_last = page_last >> 5;
|
||||||
uint32_t l2_block_first = block_first >> 6;
|
uint32_t l2_block_first = block_first >> 6;
|
||||||
uint32_t l2_block_last = block_last >> 6;
|
uint32_t l2_block_last = block_last >> 6;
|
||||||
auto watch_lock = shared_memory_->LockWatchMutex();
|
auto global_lock = global_critical_region_.Acquire();
|
||||||
for (uint32_t i = l2_block_first; i <= l2_block_last; ++i) {
|
for (uint32_t i = l2_block_first; i <= l2_block_last; ++i) {
|
||||||
uint64_t l2_block = scaled_resolve_pages_l2_[i];
|
uint64_t l2_block = scaled_resolve_pages_l2_[i];
|
||||||
if (i == l2_block_first) {
|
if (i == l2_block_first) {
|
||||||
|
|
|
@ -11,9 +11,9 @@
|
||||||
#define XENIA_GPU_D3D12_TEXTURE_CACHE_H_
|
#define XENIA_GPU_D3D12_TEXTURE_CACHE_H_
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <mutex>
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "xenia/base/mutex.h"
|
||||||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||||
#include "xenia/gpu/d3d12/shared_memory.h"
|
#include "xenia/gpu/d3d12/shared_memory.h"
|
||||||
#include "xenia/gpu/register_file.h"
|
#include "xenia/gpu/register_file.h"
|
||||||
|
@ -369,15 +369,14 @@ class TextureCache {
|
||||||
static constexpr uint32_t kCachedSRVDescriptorSwizzleMissing = UINT32_MAX;
|
static constexpr uint32_t kCachedSRVDescriptorSwizzleMissing = UINT32_MAX;
|
||||||
uint32_t cached_srv_descriptor_swizzle;
|
uint32_t cached_srv_descriptor_swizzle;
|
||||||
|
|
||||||
// Watch handles for the memory ranges (protected by the shared memory watch
|
// These are to be accessed within the global critical region to synchronize
|
||||||
// mutex).
|
// with shared memory.
|
||||||
|
// Watch handles for the memory ranges.
|
||||||
SharedMemory::WatchHandle base_watch_handle;
|
SharedMemory::WatchHandle base_watch_handle;
|
||||||
SharedMemory::WatchHandle mip_watch_handle;
|
SharedMemory::WatchHandle mip_watch_handle;
|
||||||
// Whether the recent base level data has been loaded from the memory
|
// Whether the recent base level data has been loaded from the memory.
|
||||||
// (protected by the shared memory watch mutex).
|
|
||||||
bool base_in_sync;
|
bool base_in_sync;
|
||||||
// Whether the recent mip data has been loaded from the memory (protected by
|
// Whether the recent mip data has been loaded from the memory.
|
||||||
// the shared memory watch mutex).
|
|
||||||
bool mips_in_sync;
|
bool mips_in_sync;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -620,16 +619,16 @@ class TextureCache {
|
||||||
kScaledResolveHeapSizeLog2] = {};
|
kScaledResolveHeapSizeLog2] = {};
|
||||||
// Number of currently resident portions of the tiled buffer, for profiling.
|
// Number of currently resident portions of the tiled buffer, for profiling.
|
||||||
uint32_t scaled_resolve_heap_count_ = 0;
|
uint32_t scaled_resolve_heap_count_ = 0;
|
||||||
|
// Global watch for scaled resolve data invalidation.
|
||||||
|
SharedMemory::GlobalWatchHandle scaled_resolve_global_watch_handle_ = nullptr;
|
||||||
|
|
||||||
|
xe::global_critical_region global_critical_region_;
|
||||||
// Bit vector storing whether each 4 KB physical memory page contains scaled
|
// Bit vector storing whether each 4 KB physical memory page contains scaled
|
||||||
// resolve data. uint32_t rather than uint64_t because parts of it are sent to
|
// resolve data. uint32_t rather than uint64_t because parts of it are sent to
|
||||||
// shaders.
|
// shaders.
|
||||||
// PROTECTED BY THE SHARED MEMORY WATCH MUTEX!
|
|
||||||
uint32_t* scaled_resolve_pages_ = nullptr;
|
uint32_t* scaled_resolve_pages_ = nullptr;
|
||||||
// Second level of the bit vector for faster rejection of non-scaled textures.
|
// Second level of the bit vector for faster rejection of non-scaled textures.
|
||||||
// PROTECTED BY THE SHARED MEMORY WATCH MUTEX!
|
|
||||||
uint64_t scaled_resolve_pages_l2_[(512 << 20) >> (12 + 5 + 6)];
|
uint64_t scaled_resolve_pages_l2_[(512 << 20) >> (12 + 5 + 6)];
|
||||||
// Global watch for scaled resolve data invalidation.
|
|
||||||
SharedMemory::GlobalWatchHandle scaled_resolve_global_watch_handle_ = nullptr;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -150,16 +150,18 @@ VkResult TextureCache::Initialize() {
|
||||||
|
|
||||||
device_queue_ = device_->AcquireQueue(device_->queue_family_index());
|
device_queue_ = device_->AcquireQueue(device_->queue_family_index());
|
||||||
|
|
||||||
physical_write_watch_handle_ =
|
memory_invalidation_callback_handle_ =
|
||||||
memory_->RegisterPhysicalWriteWatch(MemoryWriteCallbackThunk, this);
|
memory_->RegisterPhysicalMemoryInvalidationCallback(
|
||||||
|
MemoryInvalidationCallbackThunk, this);
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::Shutdown() {
|
void TextureCache::Shutdown() {
|
||||||
if (physical_write_watch_handle_ != nullptr) {
|
if (memory_invalidation_callback_handle_ != nullptr) {
|
||||||
memory_->UnregisterPhysicalWriteWatch(physical_write_watch_handle_);
|
memory_->UnregisterPhysicalMemoryInvalidationCallback(
|
||||||
physical_write_watch_handle_ = nullptr;
|
memory_invalidation_callback_handle_);
|
||||||
|
memory_invalidation_callback_handle_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device_queue_) {
|
if (device_queue_) {
|
||||||
|
@ -411,7 +413,7 @@ void TextureCache::WatchTexture(Texture* texture) {
|
||||||
texture->is_watched = true;
|
texture->is_watched = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
memory_->WatchPhysicalMemoryWrite(address, size);
|
memory_->EnablePhysicalMemoryAccessCallbacks(address, size, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::TextureTouched(Texture* texture) {
|
void TextureCache::TextureTouched(Texture* texture) {
|
||||||
|
@ -428,7 +430,7 @@ void TextureCache::TextureTouched(Texture* texture) {
|
||||||
texture->pending_invalidation = true;
|
texture->pending_invalidation = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<uint32_t, uint32_t> TextureCache::MemoryWriteCallback(
|
std::pair<uint32_t, uint32_t> TextureCache::MemoryInvalidationCallback(
|
||||||
uint32_t physical_address_start, uint32_t length, bool exact_range) {
|
uint32_t physical_address_start, uint32_t length, bool exact_range) {
|
||||||
global_critical_region_.Acquire();
|
global_critical_region_.Acquire();
|
||||||
if (watched_textures_.empty()) {
|
if (watched_textures_.empty()) {
|
||||||
|
@ -468,11 +470,11 @@ std::pair<uint32_t, uint32_t> TextureCache::MemoryWriteCallback(
|
||||||
return std::make_pair(previous_end, next_start - previous_end);
|
return std::make_pair(previous_end, next_start - previous_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<uint32_t, uint32_t> TextureCache::MemoryWriteCallbackThunk(
|
std::pair<uint32_t, uint32_t> TextureCache::MemoryInvalidationCallbackThunk(
|
||||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||||
bool exact_range) {
|
bool exact_range) {
|
||||||
return reinterpret_cast<TextureCache*>(context_ptr)
|
return reinterpret_cast<TextureCache*>(context_ptr)
|
||||||
->MemoryWriteCallback(physical_address_start, length, exact_range);
|
->MemoryInvalidationCallback(physical_address_start, length, exact_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureCache::Texture* TextureCache::DemandResolveTexture(
|
TextureCache::Texture* TextureCache::DemandResolveTexture(
|
||||||
|
|
|
@ -147,9 +147,9 @@ class TextureCache {
|
||||||
|
|
||||||
void WatchTexture(Texture* texture);
|
void WatchTexture(Texture* texture);
|
||||||
void TextureTouched(Texture* texture);
|
void TextureTouched(Texture* texture);
|
||||||
std::pair<uint32_t, uint32_t> MemoryWriteCallback(
|
std::pair<uint32_t, uint32_t> MemoryInvalidationCallback(
|
||||||
uint32_t physical_address_start, uint32_t length, bool exact_range);
|
uint32_t physical_address_start, uint32_t length, bool exact_range);
|
||||||
static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk(
|
static std::pair<uint32_t, uint32_t> MemoryInvalidationCallbackThunk(
|
||||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||||
bool exact_range);
|
bool exact_range);
|
||||||
|
|
||||||
|
@ -220,7 +220,7 @@ class TextureCache {
|
||||||
std::unordered_map<uint64_t, Sampler*> samplers_;
|
std::unordered_map<uint64_t, Sampler*> samplers_;
|
||||||
std::list<Texture*> pending_delete_textures_;
|
std::list<Texture*> pending_delete_textures_;
|
||||||
|
|
||||||
void* physical_write_watch_handle_ = nullptr;
|
void* memory_invalidation_callback_handle_ = nullptr;
|
||||||
|
|
||||||
xe::global_critical_region global_critical_region_;
|
xe::global_critical_region global_critical_region_;
|
||||||
std::list<WatchedTexture> watched_textures_;
|
std::list<WatchedTexture> watched_textures_;
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/memory.h"
|
#include "xenia/base/memory.h"
|
||||||
|
#include "xenia/base/mutex.h"
|
||||||
#include "xenia/cpu/processor.h"
|
#include "xenia/cpu/processor.h"
|
||||||
#include "xenia/kernel/kernel_state.h"
|
#include "xenia/kernel/kernel_state.h"
|
||||||
#include "xenia/kernel/util/shim_utils.h"
|
#include "xenia/kernel/util/shim_utils.h"
|
||||||
|
@ -172,8 +173,9 @@ dword_result_t NtReadFile(dword_t file_handle, dword_t event_handle,
|
||||||
// some games NtReadFile() directly into texture memory
|
// some games NtReadFile() directly into texture memory
|
||||||
auto heap = kernel_memory()->LookupHeap(buffer.guest_address());
|
auto heap = kernel_memory()->LookupHeap(buffer.guest_address());
|
||||||
if (heap && heap->IsGuestPhysicalHeap()) {
|
if (heap && heap->IsGuestPhysicalHeap()) {
|
||||||
kernel_memory()->TriggerWatches(buffer.guest_address(), buffer_length,
|
kernel_memory()->TriggerPhysicalMemoryCallbacks(
|
||||||
true, true);
|
xe::global_critical_region::AcquireDirect(), buffer.guest_address(),
|
||||||
|
buffer_length, true, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Synchronous.
|
// Synchronous.
|
||||||
|
|
|
@ -11,7 +11,9 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "xenia/base/assert.h"
|
||||||
#include "xenia/base/byte_stream.h"
|
#include "xenia/base/byte_stream.h"
|
||||||
#include "xenia/base/clock.h"
|
#include "xenia/base/clock.h"
|
||||||
#include "xenia/base/cvar.h"
|
#include "xenia/base/cvar.h"
|
||||||
|
@ -96,8 +98,8 @@ Memory::~Memory() {
|
||||||
// requests.
|
// requests.
|
||||||
mmio_handler_.reset();
|
mmio_handler_.reset();
|
||||||
|
|
||||||
for (auto physical_write_watch : physical_write_watches_) {
|
for (auto invalidation_callback : physical_memory_invalidation_callbacks_) {
|
||||||
delete physical_write_watch;
|
delete invalidation_callback;
|
||||||
}
|
}
|
||||||
|
|
||||||
heaps_.v00000000.Dispose();
|
heaps_.v00000000.Dispose();
|
||||||
|
@ -433,13 +435,12 @@ cpu::MMIORange* Memory::LookupVirtualMappedRange(uint32_t virtual_address) {
|
||||||
return mmio_handler_->LookupRange(virtual_address);
|
return mmio_handler_->LookupRange(virtual_address);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Memory::AccessViolationCallback(void* host_address, bool is_write) {
|
bool Memory::AccessViolationCallback(
|
||||||
if (!is_write) {
|
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||||
// TODO(Triang3l): Handle GPU readback.
|
void* host_address, bool is_write) {
|
||||||
return false;
|
// Access via physical_membase_ is special, when need to bypass everything
|
||||||
}
|
// (for instance, for a data provider to actually write the data) so only
|
||||||
// Access via physical_membase_ is special, when need to bypass everything,
|
// triggering callbacks on virtual memory regions.
|
||||||
// so only watching virtual memory regions.
|
|
||||||
if (reinterpret_cast<size_t>(host_address) <
|
if (reinterpret_cast<size_t>(host_address) <
|
||||||
reinterpret_cast<size_t>(virtual_membase_) ||
|
reinterpret_cast<size_t>(virtual_membase_) ||
|
||||||
reinterpret_cast<size_t>(host_address) >=
|
reinterpret_cast<size_t>(host_address) >=
|
||||||
|
@ -448,65 +449,79 @@ bool Memory::AccessViolationCallback(void* host_address, bool is_write) {
|
||||||
}
|
}
|
||||||
uint32_t virtual_address = HostToGuestVirtual(host_address);
|
uint32_t virtual_address = HostToGuestVirtual(host_address);
|
||||||
BaseHeap* heap = LookupHeap(virtual_address);
|
BaseHeap* heap = LookupHeap(virtual_address);
|
||||||
if (heap->IsGuestPhysicalHeap()) {
|
if (!heap->IsGuestPhysicalHeap()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Access violation callbacks from the guest are triggered when the global
|
||||||
|
// critical region mutex is locked once.
|
||||||
|
//
|
||||||
// Will be rounded to physical page boundaries internally, so just pass 1 as
|
// Will be rounded to physical page boundaries internally, so just pass 1 as
|
||||||
// the length - guranteed not to cross page boundaries also.
|
// the length - guranteed not to cross page boundaries also.
|
||||||
return static_cast<PhysicalHeap*>(heap)->TriggerWatches(virtual_address, 1,
|
auto physical_heap = static_cast<PhysicalHeap*>(heap);
|
||||||
is_write, false);
|
return physical_heap->TriggerCallbacks(std::move(global_lock_locked_once),
|
||||||
}
|
virtual_address, 1, is_write, false);
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Memory::AccessViolationCallbackThunk(void* context, void* host_address,
|
bool Memory::AccessViolationCallbackThunk(
|
||||||
bool is_write) {
|
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||||
|
void* context, void* host_address, bool is_write) {
|
||||||
return reinterpret_cast<Memory*>(context)->AccessViolationCallback(
|
return reinterpret_cast<Memory*>(context)->AccessViolationCallback(
|
||||||
host_address, is_write);
|
std::move(global_lock_locked_once), host_address, is_write);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Memory::TriggerWatches(uint32_t virtual_address, uint32_t length,
|
bool Memory::TriggerPhysicalMemoryCallbacks(
|
||||||
bool is_write, bool unwatch_exact_range,
|
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||||
bool unprotect) {
|
uint32_t virtual_address, uint32_t length, bool is_write,
|
||||||
|
bool unwatch_exact_range, bool unprotect) {
|
||||||
BaseHeap* heap = LookupHeap(virtual_address);
|
BaseHeap* heap = LookupHeap(virtual_address);
|
||||||
if (heap->IsGuestPhysicalHeap()) {
|
if (heap->IsGuestPhysicalHeap()) {
|
||||||
return static_cast<PhysicalHeap*>(heap)->TriggerWatches(
|
auto physical_heap = static_cast<PhysicalHeap*>(heap);
|
||||||
virtual_address, length, is_write, unwatch_exact_range, unprotect);
|
return physical_heap->TriggerCallbacks(std::move(global_lock_locked_once),
|
||||||
|
virtual_address, length, is_write,
|
||||||
|
unwatch_exact_range, unprotect);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void* Memory::RegisterPhysicalWriteWatch(PhysicalWriteWatchCallback callback,
|
void* Memory::RegisterPhysicalMemoryInvalidationCallback(
|
||||||
void* callback_context) {
|
PhysicalMemoryInvalidationCallback callback, void* callback_context) {
|
||||||
PhysicalWriteWatchEntry* entry = new PhysicalWriteWatchEntry;
|
auto entry = new std::pair<PhysicalMemoryInvalidationCallback, void*>(
|
||||||
entry->callback = callback;
|
callback, callback_context);
|
||||||
entry->callback_context = callback_context;
|
|
||||||
|
|
||||||
auto lock = global_critical_region_.Acquire();
|
auto lock = global_critical_region_.Acquire();
|
||||||
physical_write_watches_.push_back(entry);
|
physical_memory_invalidation_callbacks_.push_back(entry);
|
||||||
|
|
||||||
return entry;
|
return entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Memory::UnregisterPhysicalWriteWatch(void* watch_handle) {
|
void Memory::UnregisterPhysicalMemoryInvalidationCallback(
|
||||||
auto entry = reinterpret_cast<PhysicalWriteWatchEntry*>(watch_handle);
|
void* callback_handle) {
|
||||||
|
auto entry =
|
||||||
|
reinterpret_cast<std::pair<PhysicalMemoryInvalidationCallback, void*>*>(
|
||||||
|
callback_handle);
|
||||||
{
|
{
|
||||||
auto lock = global_critical_region_.Acquire();
|
auto lock = global_critical_region_.Acquire();
|
||||||
auto it = std::find(physical_write_watches_.begin(),
|
auto it = std::find(physical_memory_invalidation_callbacks_.begin(),
|
||||||
physical_write_watches_.end(), entry);
|
physical_memory_invalidation_callbacks_.end(), entry);
|
||||||
assert_false(it == physical_write_watches_.end());
|
assert_true(it != physical_memory_invalidation_callbacks_.end());
|
||||||
if (it != physical_write_watches_.end()) {
|
if (it != physical_memory_invalidation_callbacks_.end()) {
|
||||||
physical_write_watches_.erase(it);
|
physical_memory_invalidation_callbacks_.erase(it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
delete entry;
|
delete entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Memory::WatchPhysicalMemoryWrite(uint32_t physical_address,
|
void Memory::EnablePhysicalMemoryAccessCallbacks(
|
||||||
uint32_t length) {
|
uint32_t physical_address, uint32_t length,
|
||||||
// Watch independently in all three mappings.
|
bool enable_invalidation_notifications, bool enable_data_providers) {
|
||||||
heaps_.vA0000000.WatchPhysicalWrite(physical_address, length);
|
heaps_.vA0000000.EnableAccessCallbacks(physical_address, length,
|
||||||
heaps_.vC0000000.WatchPhysicalWrite(physical_address, length);
|
enable_invalidation_notifications,
|
||||||
heaps_.vE0000000.WatchPhysicalWrite(physical_address, length);
|
enable_data_providers);
|
||||||
|
heaps_.vC0000000.EnableAccessCallbacks(physical_address, length,
|
||||||
|
enable_invalidation_notifications,
|
||||||
|
enable_data_providers);
|
||||||
|
heaps_.vE0000000.EnableAccessCallbacks(physical_address, length,
|
||||||
|
enable_invalidation_notifications,
|
||||||
|
enable_data_providers);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment,
|
uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment,
|
||||||
|
@ -798,7 +813,8 @@ bool BaseHeap::Restore(ByteStream* stream) {
|
||||||
void BaseHeap::Reset() {
|
void BaseHeap::Reset() {
|
||||||
// TODO(DrChat): protect pages.
|
// TODO(DrChat): protect pages.
|
||||||
std::memset(page_table_.data(), 0, sizeof(PageEntry) * page_table_.size());
|
std::memset(page_table_.data(), 0, sizeof(PageEntry) * page_table_.size());
|
||||||
// TODO(Triang3l): Unwatch pages.
|
// TODO(Triang3l): Remove access callbacks from pages if this is a physical
|
||||||
|
// memory heap.
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BaseHeap::Alloc(uint32_t size, uint32_t alignment,
|
bool BaseHeap::Alloc(uint32_t size, uint32_t alignment,
|
||||||
|
@ -1313,9 +1329,7 @@ void PhysicalHeap::Initialize(Memory* memory, uint8_t* membase,
|
||||||
system_page_count_ =
|
system_page_count_ =
|
||||||
(heap_size_ /* already - 1 */ + host_address_offset + system_page_size_) /
|
(heap_size_ /* already - 1 */ + host_address_offset + system_page_size_) /
|
||||||
system_page_size_;
|
system_page_size_;
|
||||||
system_pages_watched_write_.resize((system_page_count_ + 63) / 64);
|
system_page_flags_.resize((system_page_count_ + 63) / 64);
|
||||||
std::memset(system_pages_watched_write_.data(), 0,
|
|
||||||
system_pages_watched_write_.size() * sizeof(uint64_t));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PhysicalHeap::Alloc(uint32_t size, uint32_t alignment,
|
bool PhysicalHeap::Alloc(uint32_t size, uint32_t alignment,
|
||||||
|
@ -1357,7 +1371,7 @@ bool PhysicalHeap::Alloc(uint32_t size, uint32_t alignment,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (protect & kMemoryProtectWrite) {
|
if (protect & kMemoryProtectWrite) {
|
||||||
TriggerWatches(address, size, true, true, false);
|
TriggerCallbacks(std::move(global_lock), address, size, true, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
*out_address = address;
|
*out_address = address;
|
||||||
|
@ -1398,7 +1412,7 @@ bool PhysicalHeap::AllocFixed(uint32_t base_address, uint32_t size,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (protect & kMemoryProtectWrite) {
|
if (protect & kMemoryProtectWrite) {
|
||||||
TriggerWatches(address, size, true, true, false);
|
TriggerCallbacks(std::move(global_lock), address, size, true, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -1443,7 +1457,7 @@ bool PhysicalHeap::AllocRange(uint32_t low_address, uint32_t high_address,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (protect & kMemoryProtectWrite) {
|
if (protect & kMemoryProtectWrite) {
|
||||||
TriggerWatches(address, size, true, true, false);
|
TriggerCallbacks(std::move(global_lock), address, size, true, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
*out_address = address;
|
*out_address = address;
|
||||||
|
@ -1477,7 +1491,7 @@ bool PhysicalHeap::Protect(uint32_t address, uint32_t size, uint32_t protect,
|
||||||
// Only invalidate if making writable again, for simplicity - not when simply
|
// Only invalidate if making writable again, for simplicity - not when simply
|
||||||
// marking some range as immutable, for instance.
|
// marking some range as immutable, for instance.
|
||||||
if (protect & kMemoryProtectWrite) {
|
if (protect & kMemoryProtectWrite) {
|
||||||
TriggerWatches(address, size, true, true, false);
|
TriggerCallbacks(std::move(global_lock), address, size, true, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!parent_heap_->Protect(GetPhysicalAddress(address), size, protect,
|
if (!parent_heap_->Protect(GetPhysicalAddress(address), size, protect,
|
||||||
|
@ -1489,8 +1503,15 @@ bool PhysicalHeap::Protect(uint32_t address, uint32_t size, uint32_t protect,
|
||||||
return BaseHeap::Protect(address, size, protect);
|
return BaseHeap::Protect(address, size, protect);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PhysicalHeap::WatchPhysicalWrite(uint32_t physical_address,
|
void PhysicalHeap::EnableAccessCallbacks(uint32_t physical_address,
|
||||||
uint32_t length) {
|
uint32_t length,
|
||||||
|
bool enable_invalidation_notifications,
|
||||||
|
bool enable_data_providers) {
|
||||||
|
// TODO(Triang3l): Implement data providers.
|
||||||
|
assert_false(enable_data_providers);
|
||||||
|
if (!enable_invalidation_notifications && !enable_data_providers) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
uint32_t physical_address_offset = GetPhysicalAddress(heap_base_);
|
uint32_t physical_address_offset = GetPhysicalAddress(heap_base_);
|
||||||
if (physical_address < physical_address_offset) {
|
if (physical_address < physical_address_offset) {
|
||||||
if (physical_address_offset - physical_address >= length) {
|
if (physical_address_offset - physical_address >= length) {
|
||||||
|
@ -1516,28 +1537,61 @@ void PhysicalHeap::WatchPhysicalWrite(uint32_t physical_address,
|
||||||
system_page_last = std::min(system_page_last, system_page_count_ - 1);
|
system_page_last = std::min(system_page_last, system_page_count_ - 1);
|
||||||
assert_true(system_page_first <= system_page_last);
|
assert_true(system_page_first <= system_page_last);
|
||||||
|
|
||||||
auto global_lock = global_critical_region_.Acquire();
|
// Update callback flags for system pages and make their protection stricter
|
||||||
|
// if needed.
|
||||||
// Protect the pages and mark them as watched. Don't mark non-writable pages
|
xe::memory::PageAccess protect_access =
|
||||||
// as watched, so true access violations can still occur there.
|
enable_data_providers ? xe::memory::PageAccess::kNoAccess
|
||||||
|
: xe::memory::PageAccess::kReadOnly;
|
||||||
uint8_t* protect_base = membase_ + heap_base_;
|
uint8_t* protect_base = membase_ + heap_base_;
|
||||||
uint32_t protect_system_page_first = UINT32_MAX;
|
uint32_t protect_system_page_first = UINT32_MAX;
|
||||||
|
auto global_lock = global_critical_region_.Acquire();
|
||||||
for (uint32_t i = system_page_first; i <= system_page_last; ++i) {
|
for (uint32_t i = system_page_first; i <= system_page_last; ++i) {
|
||||||
uint64_t page_bit = uint64_t(1) << (i & 63);
|
// Check if need to enable callbacks for the page and raise its protection.
|
||||||
// Check if need to allow writing to this page.
|
//
|
||||||
bool add_page_to_watch =
|
// If enabling invalidation notifications:
|
||||||
(system_pages_watched_write_[i >> 6] & page_bit) == 0;
|
// - Page writable and not watched for changes yet - protect and enable
|
||||||
if (add_page_to_watch) {
|
// invalidation notifications.
|
||||||
uint32_t page_number =
|
// - Page seen as writable by the guest, but only needs data providers -
|
||||||
xe::sat_sub(i * system_page_size_, host_address_offset()) /
|
// just set the bits to enable invalidation notifications (already has
|
||||||
page_size_;
|
// even stricter protection than needed).
|
||||||
if (ToPageAccess(page_table_[page_number].current_protect) !=
|
// - Page not writable as requested by the game - don't do anything (need
|
||||||
xe::memory::PageAccess::kReadWrite) {
|
// real access violations here).
|
||||||
add_page_to_watch = false;
|
// If enabling data providers:
|
||||||
|
// - Page accessible (either read/write or read-only) and didn't need data
|
||||||
|
// providers initially - protect and enable data providers.
|
||||||
|
// - Otherwise - do nothing.
|
||||||
|
//
|
||||||
|
// It's safe not to await data provider completion here before protecting as
|
||||||
|
// this never makes protection lighter, so it can't interfere with page
|
||||||
|
// faults that await data providers.
|
||||||
|
//
|
||||||
|
// Enabling data providers doesn't need to be deferred - providers will be
|
||||||
|
// polled for the last time without releasing the lock.
|
||||||
|
SystemPageFlagsBlock& page_flags_block = system_page_flags_[i >> 6];
|
||||||
|
uint64_t page_flags_bit = uint64_t(1) << (i & 63);
|
||||||
|
uint32_t guest_page_number =
|
||||||
|
xe::sat_sub(i * system_page_size_, host_address_offset()) / page_size_;
|
||||||
|
xe::memory::PageAccess current_page_access =
|
||||||
|
ToPageAccess(page_table_[guest_page_number].current_protect);
|
||||||
|
bool protect_system_page = false;
|
||||||
|
// Don't do anything with inaccessible pages - don't protect, don't enable
|
||||||
|
// callbacks - because real access violations are needed there. And don't
|
||||||
|
// enable invalidation notifications for read-only pages for the same
|
||||||
|
// reason.
|
||||||
|
if (current_page_access != xe::memory::PageAccess::kNoAccess) {
|
||||||
|
// TODO(Triang3l): Enable data providers.
|
||||||
|
if (enable_invalidation_notifications) {
|
||||||
|
if (current_page_access != xe::memory::PageAccess::kReadOnly &&
|
||||||
|
(page_flags_block.notify_on_invalidation & page_flags_bit) == 0) {
|
||||||
|
// TODO(Triang3l): Check if data providers are already enabled.
|
||||||
|
// If data providers are already enabled for the page, it has even
|
||||||
|
// stricter protection.
|
||||||
|
protect_system_page = true;
|
||||||
|
page_flags_block.notify_on_invalidation |= page_flags_bit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (add_page_to_watch) {
|
}
|
||||||
system_pages_watched_write_[i >> 6] |= page_bit;
|
if (protect_system_page) {
|
||||||
if (protect_system_page_first == UINT32_MAX) {
|
if (protect_system_page_first == UINT32_MAX) {
|
||||||
protect_system_page_first = i;
|
protect_system_page_first = i;
|
||||||
}
|
}
|
||||||
|
@ -1546,7 +1600,7 @@ void PhysicalHeap::WatchPhysicalWrite(uint32_t physical_address,
|
||||||
xe::memory::Protect(
|
xe::memory::Protect(
|
||||||
protect_base + protect_system_page_first * system_page_size_,
|
protect_base + protect_system_page_first * system_page_size_,
|
||||||
(i - protect_system_page_first) * system_page_size_,
|
(i - protect_system_page_first) * system_page_size_,
|
||||||
xe::memory::PageAccess::kReadOnly);
|
protect_access);
|
||||||
protect_system_page_first = UINT32_MAX;
|
protect_system_page_first = UINT32_MAX;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1555,13 +1609,14 @@ void PhysicalHeap::WatchPhysicalWrite(uint32_t physical_address,
|
||||||
xe::memory::Protect(
|
xe::memory::Protect(
|
||||||
protect_base + protect_system_page_first * system_page_size_,
|
protect_base + protect_system_page_first * system_page_size_,
|
||||||
(system_page_last + 1 - protect_system_page_first) * system_page_size_,
|
(system_page_last + 1 - protect_system_page_first) * system_page_size_,
|
||||||
xe::memory::PageAccess::kReadOnly);
|
protect_access);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PhysicalHeap::TriggerWatches(uint32_t virtual_address, uint32_t length,
|
bool PhysicalHeap::TriggerCallbacks(
|
||||||
bool is_write, bool unwatch_exact_range,
|
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||||
bool unprotect) {
|
uint32_t virtual_address, uint32_t length, bool is_write,
|
||||||
|
bool unwatch_exact_range, bool unprotect) {
|
||||||
// TODO(Triang3l): Support read watches.
|
// TODO(Triang3l): Support read watches.
|
||||||
assert_true(is_write);
|
assert_true(is_write);
|
||||||
if (!is_write) {
|
if (!is_write) {
|
||||||
|
@ -1594,12 +1649,10 @@ bool PhysicalHeap::TriggerWatches(uint32_t virtual_address, uint32_t length,
|
||||||
uint32_t block_index_first = system_page_first >> 6;
|
uint32_t block_index_first = system_page_first >> 6;
|
||||||
uint32_t block_index_last = system_page_last >> 6;
|
uint32_t block_index_last = system_page_last >> 6;
|
||||||
|
|
||||||
auto global_lock = global_critical_region_.Acquire();
|
|
||||||
|
|
||||||
// Check if watching any page, whether need to call the callback at all.
|
// Check if watching any page, whether need to call the callback at all.
|
||||||
bool any_watched = false;
|
bool any_watched = false;
|
||||||
for (uint32_t i = block_index_first; i <= block_index_last; ++i) {
|
for (uint32_t i = block_index_first; i <= block_index_last; ++i) {
|
||||||
uint64_t block = system_pages_watched_write_[i];
|
uint64_t block = system_page_flags_[i].notify_on_invalidation;
|
||||||
if (i == block_index_first) {
|
if (i == block_index_first) {
|
||||||
block &= ~((uint64_t(1) << (system_page_first & 63)) - 1);
|
block &= ~((uint64_t(1) << (system_page_first & 63)) - 1);
|
||||||
}
|
}
|
||||||
|
@ -1633,9 +1686,10 @@ bool PhysicalHeap::TriggerWatches(uint32_t virtual_address, uint32_t length,
|
||||||
heap_size_ + 1 - (physical_address_start - physical_address_offset));
|
heap_size_ + 1 - (physical_address_start - physical_address_offset));
|
||||||
uint32_t unwatch_first = 0;
|
uint32_t unwatch_first = 0;
|
||||||
uint32_t unwatch_last = UINT32_MAX;
|
uint32_t unwatch_last = UINT32_MAX;
|
||||||
for (auto physical_write_watch : memory_->physical_write_watches_) {
|
for (auto invalidation_callback :
|
||||||
|
memory_->physical_memory_invalidation_callbacks_) {
|
||||||
std::pair<uint32_t, uint32_t> callback_unwatch_range =
|
std::pair<uint32_t, uint32_t> callback_unwatch_range =
|
||||||
physical_write_watch->callback(physical_write_watch->callback_context,
|
invalidation_callback->first(invalidation_callback->second,
|
||||||
physical_address_start, physical_length,
|
physical_address_start, physical_length,
|
||||||
unwatch_exact_range);
|
unwatch_exact_range);
|
||||||
if (!unwatch_exact_range) {
|
if (!unwatch_exact_range) {
|
||||||
|
@ -1682,13 +1736,13 @@ bool PhysicalHeap::TriggerWatches(uint32_t virtual_address, uint32_t length,
|
||||||
uint32_t unprotect_system_page_first = UINT32_MAX;
|
uint32_t unprotect_system_page_first = UINT32_MAX;
|
||||||
for (uint32_t i = system_page_first; i <= system_page_last; ++i) {
|
for (uint32_t i = system_page_first; i <= system_page_last; ++i) {
|
||||||
// Check if need to allow writing to this page.
|
// Check if need to allow writing to this page.
|
||||||
bool unprotect_page = (system_pages_watched_write_[i >> 6] &
|
bool unprotect_page = (system_page_flags_[i >> 6].notify_on_invalidation &
|
||||||
(uint64_t(1) << (i & 63))) != 0;
|
(uint64_t(1) << (i & 63))) != 0;
|
||||||
if (unprotect_page) {
|
if (unprotect_page) {
|
||||||
uint32_t page_number =
|
uint32_t guest_page_number =
|
||||||
xe::sat_sub(i * system_page_size_, host_address_offset()) /
|
xe::sat_sub(i * system_page_size_, host_address_offset()) /
|
||||||
page_size_;
|
page_size_;
|
||||||
if (ToPageAccess(page_table_[page_number].current_protect) !=
|
if (ToPageAccess(page_table_[guest_page_number].current_protect) !=
|
||||||
xe::memory::PageAccess::kReadWrite) {
|
xe::memory::PageAccess::kReadWrite) {
|
||||||
unprotect_page = false;
|
unprotect_page = false;
|
||||||
}
|
}
|
||||||
|
@ -1725,7 +1779,7 @@ bool PhysicalHeap::TriggerWatches(uint32_t virtual_address, uint32_t length,
|
||||||
if (i == block_index_last && (system_page_last & 63) != 63) {
|
if (i == block_index_last && (system_page_last & 63) != 63) {
|
||||||
mask |= ~((uint64_t(1) << ((system_page_last & 63) + 1)) - 1);
|
mask |= ~((uint64_t(1) << ((system_page_last & 63) + 1)) - 1);
|
||||||
}
|
}
|
||||||
system_pages_watched_write_[i] &= mask;
|
system_page_flags_[i].notify_on_invalidation &= mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -238,9 +239,13 @@ class PhysicalHeap : public BaseHeap {
|
||||||
bool Protect(uint32_t address, uint32_t size, uint32_t protect,
|
bool Protect(uint32_t address, uint32_t size, uint32_t protect,
|
||||||
uint32_t* old_protect = nullptr) override;
|
uint32_t* old_protect = nullptr) override;
|
||||||
|
|
||||||
void WatchPhysicalWrite(uint32_t physical_address, uint32_t length);
|
void EnableAccessCallbacks(uint32_t physical_address, uint32_t length,
|
||||||
|
bool enable_invalidation_notifications,
|
||||||
|
bool enable_data_providers);
|
||||||
// Returns true if any page in the range was watched.
|
// Returns true if any page in the range was watched.
|
||||||
bool TriggerWatches(uint32_t virtual_address, uint32_t length, bool is_write,
|
bool TriggerCallbacks(
|
||||||
|
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||||
|
uint32_t virtual_address, uint32_t length, bool is_write,
|
||||||
bool unwatch_exact_range, bool unprotect = true);
|
bool unwatch_exact_range, bool unprotect = true);
|
||||||
|
|
||||||
bool IsGuestPhysicalHeap() const override { return true; }
|
bool IsGuestPhysicalHeap() const override { return true; }
|
||||||
|
@ -251,8 +256,15 @@ class PhysicalHeap : public BaseHeap {
|
||||||
|
|
||||||
uint32_t system_page_size_;
|
uint32_t system_page_size_;
|
||||||
uint32_t system_page_count_;
|
uint32_t system_page_count_;
|
||||||
// Protected by global_critical_region.
|
|
||||||
std::vector<uint64_t> system_pages_watched_write_;
|
struct SystemPageFlagsBlock {
|
||||||
|
// Whether writing to each page should result trigger invalidation
|
||||||
|
// callbacks.
|
||||||
|
uint64_t notify_on_invalidation;
|
||||||
|
};
|
||||||
|
// Protected by global_critical_region. Flags for each 64 system pages,
|
||||||
|
// interleaved as blocks, so bit scan can be used to quickly extract ranges.
|
||||||
|
std::vector<SystemPageFlagsBlock> system_page_flags_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Models the entire guest memory system on the console.
|
// Models the entire guest memory system on the console.
|
||||||
|
@ -347,63 +359,79 @@ class Memory {
|
||||||
// Gets the defined MMIO range for the given virtual address, if any.
|
// Gets the defined MMIO range for the given virtual address, if any.
|
||||||
cpu::MMIORange* LookupVirtualMappedRange(uint32_t virtual_address);
|
cpu::MMIORange* LookupVirtualMappedRange(uint32_t virtual_address);
|
||||||
|
|
||||||
|
// Physical memory access callbacks, two types of them.
|
||||||
|
//
|
||||||
|
// This is simple per-system-page protection without reference counting or
|
||||||
|
// stored ranges. Whenever a watched page is accessed, all callbacks for it
|
||||||
|
// are triggered. Also the only way to remove callbacks is to trigger them
|
||||||
|
// somehow. Since there are no references from pages to individual callbacks,
|
||||||
|
// there's no way to disable only a specific callback for a page. Also
|
||||||
|
// callbacks may be triggered spuriously, and handlers should properly ignore
|
||||||
|
// pages they don't care about.
|
||||||
|
//
|
||||||
|
// Once callbacks are triggered for a page, the page is not watched anymore
|
||||||
|
// until requested again later. It is, however, unwatched only in one guest
|
||||||
|
// view of physical memory (because different views may have different
|
||||||
|
// protection for the same memory) - but it's rare when the same memory is
|
||||||
|
// used with different guest page sizes, and it's okay to fire a callback more
|
||||||
|
// than once.
|
||||||
|
//
|
||||||
|
// Only accessing the guest virtual memory views of physical memory triggers
|
||||||
|
// callbacks - data providers, for instance, must write to the host physical
|
||||||
|
// heap directly, otherwise their threads may infinitely await themselves.
|
||||||
|
//
|
||||||
|
// - Invalidation notifications:
|
||||||
|
//
|
||||||
|
// Protecting from writing. One-shot callbacks for invalidation of various
|
||||||
|
// kinds of physical memory caches (such as the GPU copy of the memory).
|
||||||
|
//
|
||||||
|
// May be triggered for a single page (in case of a write access violation or
|
||||||
|
// when need to synchronize data given by data providers) or for multiple
|
||||||
|
// pages (like when memory is allocated).
|
||||||
|
//
|
||||||
|
// Since granularity of callbacks is one single page, an invalidation
|
||||||
|
// notification handler must invalidate the all the data stored in the touched
|
||||||
|
// pages.
|
||||||
|
//
|
||||||
|
// Because large ranges (like whole framebuffers) may be written to and
|
||||||
|
// exceptions are expensive, it's better to unprotect multiple pages as a
|
||||||
|
// result of a write access violation, so the shortest common range returned
|
||||||
|
// by all the invalidation callbacks (clamped to a sane range and also not to
|
||||||
|
// touch pages with provider callbacks) is unprotected.
|
||||||
|
//
|
||||||
|
// - Data providers:
|
||||||
|
//
|
||||||
|
// TODO(Triang3l): Implement data providers - more complicated because they
|
||||||
|
// will need to be able to release the global lock.
|
||||||
|
|
||||||
// Returns start and length of the smallest physical memory region surrounding
|
// Returns start and length of the smallest physical memory region surrounding
|
||||||
// the watched region that can be safely unwatched, if it doesn't matter,
|
// the watched region that can be safely unwatched, if it doesn't matter,
|
||||||
// return (0, UINT32_MAX).
|
// return (0, UINT32_MAX).
|
||||||
typedef std::pair<uint32_t, uint32_t> (*PhysicalWriteWatchCallback)(
|
typedef std::pair<uint32_t, uint32_t> (*PhysicalMemoryInvalidationCallback)(
|
||||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||||
bool exact_range);
|
bool exact_range);
|
||||||
|
// Returns a handle for unregistering or for skipping one notification handler
|
||||||
|
// while triggering data providers.
|
||||||
|
void* RegisterPhysicalMemoryInvalidationCallback(
|
||||||
|
PhysicalMemoryInvalidationCallback callback, void* callback_context);
|
||||||
|
// Unregisters a physical memory invalidation callback previously added with
|
||||||
|
// RegisterPhysicalMemoryInvalidationCallback.
|
||||||
|
void UnregisterPhysicalMemoryInvalidationCallback(void* callback_handle);
|
||||||
|
|
||||||
// Physical memory write watching, allowing subsystems to invalidate cached
|
// Enables physical memory access callbacks for the specified memory range,
|
||||||
// data that depends on memory contents.
|
// snapped to system page boundaries.
|
||||||
//
|
void EnablePhysicalMemoryAccessCallbacks(
|
||||||
// Placing a watch simply marks the pages (of the system page size) as
|
uint32_t physical_address, uint32_t length,
|
||||||
// watched, individual watched ranges (or which specific subscribers are
|
bool enable_invalidation_notifications, bool enable_data_providers);
|
||||||
// watching specific pages) are not stored. Because of this, callbacks may be
|
|
||||||
// triggered multiple times for a single range, and for any watched page every
|
|
||||||
// registered callbacks is triggered. This is a very simple one-shot method
|
|
||||||
// for use primarily for cache invalidation - there may be spurious firing,
|
|
||||||
// for example, if the game only makes the pages writable without actually
|
|
||||||
// writing anything (done for simplicity).
|
|
||||||
//
|
|
||||||
// A range of pages can be watched at any time, but pages are only unwatched
|
|
||||||
// when watches are triggered (since multiple subscribers can depend on the
|
|
||||||
// same memory, and one subscriber shouldn't interfere with another).
|
|
||||||
//
|
|
||||||
// Callbacks can be triggered for one page (if the guest just stores words) or
|
|
||||||
// for multiple pages (for file reading, making pages writable).
|
|
||||||
//
|
|
||||||
// Only guest physical memory mappings are watched - the host-only mapping is
|
|
||||||
// not protected so it can be used to bypass the write protection (for file
|
|
||||||
// reads, for example - in this case, watches are triggered manually).
|
|
||||||
//
|
|
||||||
// Note that when a watch is triggered, the watched page is unprotected only
|
|
||||||
// in the heap where the address is located. Since different virtual memory
|
|
||||||
// mappings of physical memory can have different protection levels for the
|
|
||||||
// same pages, and watches must not be placed on read-only or totally
|
|
||||||
// inaccessible pages, there are significant difficulties with synchronizing
|
|
||||||
// all the three ranges, but it's generally not needed.
|
|
||||||
void* RegisterPhysicalWriteWatch(PhysicalWriteWatchCallback callback,
|
|
||||||
void* callback_context);
|
|
||||||
|
|
||||||
// Unregisters a physical memory write watch previously added with
|
|
||||||
// RegisterPhysicalWriteWatch.
|
|
||||||
void UnregisterPhysicalWriteWatch(void* watch_handle);
|
|
||||||
|
|
||||||
// Enables watching of the specified memory range, snapped to system page
|
|
||||||
// boundaries. When something is written to a watched range (or when the
|
|
||||||
// protection of it changes in a a way that it becomes writable), the
|
|
||||||
// registered watch callbacks are triggered for the page (or pages, for file
|
|
||||||
// reads and protection changes) where something has been written to. This
|
|
||||||
// protects physical memory only under virtual_membase_, so writing to
|
|
||||||
// physical_membase_ can be done to bypass the protection placed by the
|
|
||||||
// watches.
|
|
||||||
void WatchPhysicalMemoryWrite(uint32_t physical_address, uint32_t length);
|
|
||||||
|
|
||||||
// Forces triggering of watch callbacks for a virtual address range if pages
|
// Forces triggering of watch callbacks for a virtual address range if pages
|
||||||
// are watched there and unwatching them. Returns whether any page was
|
// are watched there and unwatching them. Returns whether any page was
|
||||||
// watched.
|
// watched. Must be called with global critical region locking depth of 1.
|
||||||
bool TriggerWatches(uint32_t virtual_address, uint32_t length, bool is_write,
|
// TODO(Triang3l): Implement data providers - this is why locking depth of 1
|
||||||
|
// will be required in the future.
|
||||||
|
bool TriggerPhysicalMemoryCallbacks(
|
||||||
|
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||||
|
uint32_t virtual_address, uint32_t length, bool is_write,
|
||||||
bool unwatch_exact_range, bool unprotect = true);
|
bool unwatch_exact_range, bool unprotect = true);
|
||||||
|
|
||||||
// Allocates virtual memory from the 'system' heap.
|
// Allocates virtual memory from the 'system' heap.
|
||||||
|
@ -443,9 +471,12 @@ class Memory {
|
||||||
static uint32_t HostToGuestVirtualThunk(const void* context,
|
static uint32_t HostToGuestVirtualThunk(const void* context,
|
||||||
const void* host_address);
|
const void* host_address);
|
||||||
|
|
||||||
bool AccessViolationCallback(void* host_address, bool is_write);
|
bool AccessViolationCallback(
|
||||||
static bool AccessViolationCallbackThunk(void* context, void* host_address,
|
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||||
bool is_write);
|
void* host_address, bool is_write);
|
||||||
|
static bool AccessViolationCallbackThunk(
|
||||||
|
std::unique_lock<std::recursive_mutex> global_lock_locked_once,
|
||||||
|
void* context, void* host_address, bool is_write);
|
||||||
|
|
||||||
std::wstring file_name_;
|
std::wstring file_name_;
|
||||||
uint32_t system_page_size_ = 0;
|
uint32_t system_page_size_ = 0;
|
||||||
|
@ -487,12 +518,9 @@ class Memory {
|
||||||
friend class BaseHeap;
|
friend class BaseHeap;
|
||||||
|
|
||||||
friend class PhysicalHeap;
|
friend class PhysicalHeap;
|
||||||
struct PhysicalWriteWatchEntry {
|
|
||||||
PhysicalWriteWatchCallback callback;
|
|
||||||
void* callback_context;
|
|
||||||
};
|
|
||||||
xe::global_critical_region global_critical_region_;
|
xe::global_critical_region global_critical_region_;
|
||||||
std::vector<PhysicalWriteWatchEntry*> physical_write_watches_;
|
std::vector<std::pair<PhysicalMemoryInvalidationCallback, void*>*>
|
||||||
|
physical_memory_invalidation_callbacks_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
Loading…
Reference in New Issue