[Memory] Pass exact_range to watch callbacks
This commit is contained in:
parent
a0c92e30ce
commit
0370f8bbd9
|
@ -700,7 +700,7 @@ void* PrimitiveConverter::AllocateIndices(
|
|||
}
|
||||
|
||||
std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallback(
|
||||
uint32_t physical_address_start, uint32_t length) {
|
||||
uint32_t physical_address_start, uint32_t length, bool exact_range) {
|
||||
// 1 bit = (512 / 64) MB = 8 MB. Invalidate a region of this size.
|
||||
uint32_t bit_index_first = physical_address_start >> 23;
|
||||
uint32_t bit_index_last = (physical_address_start + length - 1) >> 23;
|
||||
|
@ -713,9 +713,10 @@ std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallback(
|
|||
}
|
||||
|
||||
std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallbackThunk(
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length) {
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||
bool exact_range) {
|
||||
return reinterpret_cast<PrimitiveConverter*>(context_ptr)
|
||||
->MemoryWriteCallback(physical_address_start, length);
|
||||
->MemoryWriteCallback(physical_address_start, length, exact_range);
|
||||
}
|
||||
|
||||
D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer(
|
||||
|
|
|
@ -90,9 +90,10 @@ class PrimitiveConverter {
|
|||
|
||||
// Callback for invalidating buffers mid-frame.
|
||||
std::pair<uint32_t, uint32_t> MemoryWriteCallback(
|
||||
uint32_t physical_address_start, uint32_t length);
|
||||
uint32_t physical_address_start, uint32_t length, bool exact_range);
|
||||
static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk(
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length);
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||
bool exact_range);
|
||||
|
||||
D3D12CommandProcessor* command_processor_;
|
||||
RegisterFile* register_file_;
|
||||
|
|
|
@ -578,13 +578,14 @@ void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
|
|||
}
|
||||
|
||||
std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallbackThunk(
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length) {
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||
bool exact_range) {
|
||||
return reinterpret_cast<SharedMemory*>(context_ptr)
|
||||
->MemoryWriteCallback(physical_address_start, length);
|
||||
->MemoryWriteCallback(physical_address_start, length, exact_range);
|
||||
}
|
||||
|
||||
std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallback(
|
||||
uint32_t physical_address_start, uint32_t length) {
|
||||
uint32_t physical_address_start, uint32_t length, bool exact_range) {
|
||||
uint32_t page_first = physical_address_start >> page_size_log2_;
|
||||
uint32_t page_last = (physical_address_start + length - 1) >> page_size_log2_;
|
||||
assert_true(page_first < page_count_ && page_last < page_count_);
|
||||
|
@ -593,26 +594,28 @@ std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallback(
|
|||
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
|
||||
// Check if a somewhat wider range (up to 256 KB with 4 KB pages) can be
|
||||
// invalidated - if no GPU-written data nearby that was not intended to be
|
||||
// invalidated since it's not in sync with CPU memory and can't be reuploaded.
|
||||
// It's a lot cheaper to upload some excess data than to catch access
|
||||
// violations - with 4 KB callbacks, the original Doom runs at 4 FPS on
|
||||
// Intel Core i7-3770, with 64 KB the CPU game code takes 3 ms to run per
|
||||
// frame, but with 256 KB it's 0.7 ms.
|
||||
if (page_first & 63) {
|
||||
uint64_t gpu_written_start =
|
||||
valid_and_gpu_written_pages_[(block_first << 1) + 1];
|
||||
gpu_written_start &= (1ull << (page_first & 63)) - 1;
|
||||
page_first =
|
||||
(page_first & ~uint32_t(63)) + (64 - xe::lzcnt(gpu_written_start));
|
||||
}
|
||||
if ((page_last & 63) != 63) {
|
||||
uint64_t gpu_written_end =
|
||||
valid_and_gpu_written_pages_[(block_last << 1) + 1];
|
||||
gpu_written_end &= ~((1ull << ((page_last & 63) + 1)) - 1);
|
||||
page_last = (page_last & ~uint32_t(63)) +
|
||||
(std::max(xe::tzcnt(gpu_written_end), uint8_t(1)) - 1);
|
||||
if (!exact_range) {
|
||||
// Check if a somewhat wider range (up to 256 KB with 4 KB pages) can be
|
||||
// invalidated - if no GPU-written data nearby that was not intended to be
|
||||
// invalidated since it's not in sync with CPU memory and can't be
|
||||
// reuploaded. It's a lot cheaper to upload some excess data than to catch
|
||||
// access violations - with 4 KB callbacks, the original Doom runs at 4 FPS
|
||||
// on Intel Core i7-3770, with 64 KB the CPU game code takes 3 ms to run per
|
||||
// frame, but with 256 KB it's 0.7 ms.
|
||||
if (page_first & 63) {
|
||||
uint64_t gpu_written_start =
|
||||
valid_and_gpu_written_pages_[(block_first << 1) + 1];
|
||||
gpu_written_start &= (1ull << (page_first & 63)) - 1;
|
||||
page_first =
|
||||
(page_first & ~uint32_t(63)) + (64 - xe::lzcnt(gpu_written_start));
|
||||
}
|
||||
if ((page_last & 63) != 63) {
|
||||
uint64_t gpu_written_end =
|
||||
valid_and_gpu_written_pages_[(block_last << 1) + 1];
|
||||
gpu_written_end &= ~((1ull << ((page_last & 63) + 1)) - 1);
|
||||
page_last = (page_last & ~uint32_t(63)) +
|
||||
(std::max(xe::tzcnt(gpu_written_end), uint8_t(1)) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = block_first; i <= block_last; ++i) {
|
||||
|
|
|
@ -191,9 +191,10 @@ class SharedMemory {
|
|||
|
||||
// Memory access callback.
|
||||
static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk(
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length);
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||
bool exact_range);
|
||||
std::pair<uint32_t, uint32_t> MemoryWriteCallback(
|
||||
uint32_t physical_address_start, uint32_t length);
|
||||
uint32_t physical_address_start, uint32_t length, bool exact_range);
|
||||
|
||||
struct GlobalWatch {
|
||||
GlobalWatchCallback callback;
|
||||
|
|
|
@ -1643,7 +1643,8 @@ bool PhysicalHeap::TriggerWatches(uint32_t virtual_address, uint32_t length,
|
|||
for (auto physical_write_watch : memory_->physical_write_watches_) {
|
||||
std::pair<uint32_t, uint32_t> callback_unwatch_range =
|
||||
physical_write_watch->callback(physical_write_watch->callback_context,
|
||||
physical_address_start, physical_length);
|
||||
physical_address_start, physical_length,
|
||||
unwatch_exact_range);
|
||||
if (!unwatch_exact_range) {
|
||||
unwatch_first = std::max(unwatch_first, callback_unwatch_range.first);
|
||||
unwatch_last = std::min(
|
||||
|
|
|
@ -338,7 +338,8 @@ class Memory {
|
|||
// the watched region that can be safely unwatched, if it doesn't matter,
|
||||
// return (0, UINT32_MAX).
|
||||
typedef std::pair<uint32_t, uint32_t> (*PhysicalWriteWatchCallback)(
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length);
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||
bool exact_range);
|
||||
|
||||
// Physical memory write watching, allowing subsystems to invalidate cached
|
||||
// data that depends on memory contents.
|
||||
|
|
Loading…
Reference in New Issue