[D3D12] SHM functions needed for resolves

This commit is contained in:
Triang3l 2018-08-18 22:43:02 +03:00
parent 36cc19017a
commit e3a50a207d
2 changed files with 109 additions and 44 deletions

View File

@ -193,8 +193,7 @@ void SharedMemory::UnwatchMemoryRange(WatchHandle handle) {
UnlinkWatchRange(reinterpret_cast<WatchRange*>(handle)); UnlinkWatchRange(reinterpret_cast<WatchRange*>(handle));
} }
bool SharedMemory::RequestRange(uint32_t start, uint32_t length, bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) {
ID3D12GraphicsCommandList* command_list) {
if (length == 0) { if (length == 0) {
// Some texture is empty, for example - safe to draw in this case. // Some texture is empty, for example - safe to draw in this case.
return true; return true;
@ -204,16 +203,13 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
// Exceeds the physical address space. // Exceeds the physical address space.
return false; return false;
} }
uint32_t last = start + length - 1;
#if FINE_GRAINED_DRAW_SCOPES if (!FLAGS_d3d12_tiled_resources) {
SCOPE_profile_cpu_f("gpu"); return true;
#endif // FINE_GRAINED_DRAW_SCOPES }
// Ensure all tile heaps are present.
if (FLAGS_d3d12_tiled_resources) {
uint32_t heap_first = start >> kHeapSizeLog2; uint32_t heap_first = start >> kHeapSizeLog2;
uint32_t heap_last = last >> kHeapSizeLog2; uint32_t heap_last = (start + length - 1) >> kHeapSizeLog2;
for (uint32_t i = heap_first; i <= heap_last; ++i) { for (uint32_t i = heap_first; i <= heap_last; ++i) {
if (heaps_[i] != nullptr) { if (heaps_[i] != nullptr) {
continue; continue;
@ -255,6 +251,29 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
&range_flags, &heap_range_start_offset, &range_tile_count, &range_flags, &heap_range_start_offset, &range_tile_count,
D3D12_TILE_MAPPING_FLAG_NONE); D3D12_TILE_MAPPING_FLAG_NONE);
} }
return true;
}
bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
ID3D12GraphicsCommandList* command_list) {
if (length == 0) {
// Some texture is empty, for example - safe to draw in this case.
return true;
}
start &= kAddressMask;
if (start >= kBufferSize || (kBufferSize - start) < length) {
// Exceeds the physical address space.
return false;
}
uint32_t last = start + length - 1;
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
// Ensure all tile heaps are present.
if (!MakeTilesResident(start, length)) {
return false;
} }
// Upload and protect used ranges. // Upload and protect used ranges.
@ -296,6 +315,42 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
return true; return true;
} }
void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) {
start &= kAddressMask;
if (length == 0 || start >= kBufferSize) {
return;
}
length = std::min(length, kBufferSize - start);
uint32_t end = start + length - 1;
uint32_t page_first = start >> page_size_log2_;
uint32_t page_last = end >> page_size_log2_;
uint32_t bucket_first = start >> kWatchBucketSizeLog2;
uint32_t bucket_last = end >> kWatchBucketSizeLog2;
std::lock_guard<std::recursive_mutex> lock(validity_mutex_);
// Trigger modification callbacks so, for instance, resolved data is loaded to
// the texture.
for (uint32_t i = bucket_first; i <= bucket_last; ++i) {
WatchNode* node = watch_buckets_[i];
while (node != nullptr) {
WatchRange* range = node->range;
// Store the next node now since when the callback is triggered, the links
// will be broken.
node = node->bucket_node_next;
if (page_first <= range->page_last && page_last >= range->page_first) {
range->callback(range->callback_context, range->callback_data,
range->callback_argument);
UnlinkWatchRange(range);
}
}
}
// Mark the range as valid (so pages are not reuploaded until modified by the
// CPU) and protect it so the CPU can reuse it.
MakeRangeValid(page_first, page_last - page_first + 1);
}
void SharedMemory::MakeRangeValid(uint32_t valid_page_first, void SharedMemory::MakeRangeValid(uint32_t valid_page_first,
uint32_t valid_page_count) { uint32_t valid_page_count) {
if (valid_page_first >= page_count_ || valid_page_count == 0) { if (valid_page_first >= page_count_ || valid_page_count == 0) {

View File

@ -67,6 +67,10 @@ class SharedMemory {
inline void LockWatchMutex() { validity_mutex_.lock(); } inline void LockWatchMutex() { validity_mutex_.lock(); }
inline void UnlockWatchMutex() { validity_mutex_.unlock(); } inline void UnlockWatchMutex() { validity_mutex_.unlock(); }
// Ensures the buffer tiles backing the range are resident, but doesn't upload
// anything.
bool MakeTilesResident(uint32_t start, uint32_t length);
// Checks if the range has been updated, uploads new data if needed and // Checks if the range has been updated, uploads new data if needed and
// ensures the buffer tiles backing the range are resident. May transition the // ensures the buffer tiles backing the range are resident. May transition the
// tiled buffer to copy destination - call this before UseForReading or // tiled buffer to copy destination - call this before UseForReading or
@ -75,6 +79,12 @@ class SharedMemory {
bool RequestRange(uint32_t start, uint32_t length, bool RequestRange(uint32_t start, uint32_t length,
ID3D12GraphicsCommandList* command_list); ID3D12GraphicsCommandList* command_list);
// Marks the range as containing GPU-generated data (such as resolves),
// triggering modification callbacks, making it valid (so pages are not
// copied from the main memory until they're modified by the CPU) and
// protecting it.
void RangeWrittenByGPU(uint32_t start, uint32_t length);
// Makes the buffer usable for vertices, indices and texture untiling. // Makes the buffer usable for vertices, indices and texture untiling.
void UseForReading(ID3D12GraphicsCommandList* command_list); void UseForReading(ID3D12GraphicsCommandList* command_list);
// Makes the buffer usable for texture tiling after a resolve. // Makes the buffer usable for texture tiling after a resolve.