[Memory] Pass exact_range to watch callbacks

This commit is contained in:
Triang3l 2019-08-03 19:16:04 +03:00
parent a0c92e30ce
commit 0370f8bbd9
6 changed files with 40 additions and 32 deletions

View File

@ -700,7 +700,7 @@ void* PrimitiveConverter::AllocateIndices(
} }
std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallback( std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallback(
uint32_t physical_address_start, uint32_t length) { uint32_t physical_address_start, uint32_t length, bool exact_range) {
// 1 bit = (512 / 64) MB = 8 MB. Invalidate a region of this size. // 1 bit = (512 / 64) MB = 8 MB. Invalidate a region of this size.
uint32_t bit_index_first = physical_address_start >> 23; uint32_t bit_index_first = physical_address_start >> 23;
uint32_t bit_index_last = (physical_address_start + length - 1) >> 23; uint32_t bit_index_last = (physical_address_start + length - 1) >> 23;
@ -713,9 +713,10 @@ std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallback(
} }
std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallbackThunk( std::pair<uint32_t, uint32_t> PrimitiveConverter::MemoryWriteCallbackThunk(
void* context_ptr, uint32_t physical_address_start, uint32_t length) { void* context_ptr, uint32_t physical_address_start, uint32_t length,
bool exact_range) {
return reinterpret_cast<PrimitiveConverter*>(context_ptr) return reinterpret_cast<PrimitiveConverter*>(context_ptr)
->MemoryWriteCallback(physical_address_start, length); ->MemoryWriteCallback(physical_address_start, length, exact_range);
} }
D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer( D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer(

View File

@ -90,9 +90,10 @@ class PrimitiveConverter {
// Callback for invalidating buffers mid-frame. // Callback for invalidating buffers mid-frame.
std::pair<uint32_t, uint32_t> MemoryWriteCallback( std::pair<uint32_t, uint32_t> MemoryWriteCallback(
uint32_t physical_address_start, uint32_t length); uint32_t physical_address_start, uint32_t length, bool exact_range);
static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk( static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk(
void* context_ptr, uint32_t physical_address_start, uint32_t length); void* context_ptr, uint32_t physical_address_start, uint32_t length,
bool exact_range);
D3D12CommandProcessor* command_processor_; D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_; RegisterFile* register_file_;

View File

@ -578,13 +578,14 @@ void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
} }
std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallbackThunk( std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallbackThunk(
void* context_ptr, uint32_t physical_address_start, uint32_t length) { void* context_ptr, uint32_t physical_address_start, uint32_t length,
bool exact_range) {
return reinterpret_cast<SharedMemory*>(context_ptr) return reinterpret_cast<SharedMemory*>(context_ptr)
->MemoryWriteCallback(physical_address_start, length); ->MemoryWriteCallback(physical_address_start, length, exact_range);
} }
std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallback( std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallback(
uint32_t physical_address_start, uint32_t length) { uint32_t physical_address_start, uint32_t length, bool exact_range) {
uint32_t page_first = physical_address_start >> page_size_log2_; uint32_t page_first = physical_address_start >> page_size_log2_;
uint32_t page_last = (physical_address_start + length - 1) >> page_size_log2_; uint32_t page_last = (physical_address_start + length - 1) >> page_size_log2_;
assert_true(page_first < page_count_ && page_last < page_count_); assert_true(page_first < page_count_ && page_last < page_count_);
@ -593,26 +594,28 @@ std::pair<uint32_t, uint32_t> SharedMemory::MemoryWriteCallback(
auto global_lock = global_critical_region_.Acquire(); auto global_lock = global_critical_region_.Acquire();
// Check if a somewhat wider range (up to 256 KB with 4 KB pages) can be if (!exact_range) {
// invalidated - if no GPU-written data nearby that was not intended to be // Check if a somewhat wider range (up to 256 KB with 4 KB pages) can be
// invalidated since it's not in sync with CPU memory and can't be reuploaded. // invalidated - if no GPU-written data nearby that was not intended to be
// It's a lot cheaper to upload some excess data than to catch access // invalidated since it's not in sync with CPU memory and can't be
// violations - with 4 KB callbacks, the original Doom runs at 4 FPS on // reuploaded. It's a lot cheaper to upload some excess data than to catch
// Intel Core i7-3770, with 64 KB the CPU game code takes 3 ms to run per // access violations - with 4 KB callbacks, the original Doom runs at 4 FPS
// frame, but with 256 KB it's 0.7 ms. // on Intel Core i7-3770, with 64 KB the CPU game code takes 3 ms to run per
if (page_first & 63) { // frame, but with 256 KB it's 0.7 ms.
uint64_t gpu_written_start = if (page_first & 63) {
valid_and_gpu_written_pages_[(block_first << 1) + 1]; uint64_t gpu_written_start =
gpu_written_start &= (1ull << (page_first & 63)) - 1; valid_and_gpu_written_pages_[(block_first << 1) + 1];
page_first = gpu_written_start &= (1ull << (page_first & 63)) - 1;
(page_first & ~uint32_t(63)) + (64 - xe::lzcnt(gpu_written_start)); page_first =
} (page_first & ~uint32_t(63)) + (64 - xe::lzcnt(gpu_written_start));
if ((page_last & 63) != 63) { }
uint64_t gpu_written_end = if ((page_last & 63) != 63) {
valid_and_gpu_written_pages_[(block_last << 1) + 1]; uint64_t gpu_written_end =
gpu_written_end &= ~((1ull << ((page_last & 63) + 1)) - 1); valid_and_gpu_written_pages_[(block_last << 1) + 1];
page_last = (page_last & ~uint32_t(63)) + gpu_written_end &= ~((1ull << ((page_last & 63) + 1)) - 1);
(std::max(xe::tzcnt(gpu_written_end), uint8_t(1)) - 1); page_last = (page_last & ~uint32_t(63)) +
(std::max(xe::tzcnt(gpu_written_end), uint8_t(1)) - 1);
}
} }
for (uint32_t i = block_first; i <= block_last; ++i) { for (uint32_t i = block_first; i <= block_last; ++i) {

View File

@ -191,9 +191,10 @@ class SharedMemory {
// Memory access callback. // Memory access callback.
static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk( static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk(
void* context_ptr, uint32_t physical_address_start, uint32_t length); void* context_ptr, uint32_t physical_address_start, uint32_t length,
bool exact_range);
std::pair<uint32_t, uint32_t> MemoryWriteCallback( std::pair<uint32_t, uint32_t> MemoryWriteCallback(
uint32_t physical_address_start, uint32_t length); uint32_t physical_address_start, uint32_t length, bool exact_range);
struct GlobalWatch { struct GlobalWatch {
GlobalWatchCallback callback; GlobalWatchCallback callback;

View File

@ -1643,7 +1643,8 @@ bool PhysicalHeap::TriggerWatches(uint32_t virtual_address, uint32_t length,
for (auto physical_write_watch : memory_->physical_write_watches_) { for (auto physical_write_watch : memory_->physical_write_watches_) {
std::pair<uint32_t, uint32_t> callback_unwatch_range = std::pair<uint32_t, uint32_t> callback_unwatch_range =
physical_write_watch->callback(physical_write_watch->callback_context, physical_write_watch->callback(physical_write_watch->callback_context,
physical_address_start, physical_length); physical_address_start, physical_length,
unwatch_exact_range);
if (!unwatch_exact_range) { if (!unwatch_exact_range) {
unwatch_first = std::max(unwatch_first, callback_unwatch_range.first); unwatch_first = std::max(unwatch_first, callback_unwatch_range.first);
unwatch_last = std::min( unwatch_last = std::min(

View File

@ -338,7 +338,8 @@ class Memory {
// the watched region that can be safely unwatched, if it doesn't matter, // the watched region that can be safely unwatched, if it doesn't matter,
// return (0, UINT32_MAX). // return (0, UINT32_MAX).
typedef std::pair<uint32_t, uint32_t> (*PhysicalWriteWatchCallback)( typedef std::pair<uint32_t, uint32_t> (*PhysicalWriteWatchCallback)(
void* context_ptr, uint32_t physical_address_start, uint32_t length); void* context_ptr, uint32_t physical_address_start, uint32_t length,
bool exact_range);
// Physical memory write watching, allowing subsystems to invalidate cached // Physical memory write watching, allowing subsystems to invalidate cached
// data that depends on memory contents. // data that depends on memory contents.