[D3D12] Upload data to shared memory during frame (fixes swaying palms in CoD4)
This commit is contained in:
parent
1cec143810
commit
c547851626
|
@ -454,11 +454,9 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
auto direct_queue = provider->GetDirectQueue();
|
||||
|
||||
for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) {
|
||||
command_lists_setup_[i] = ui::d3d12::CommandList::Create(
|
||||
device, direct_queue, D3D12_COMMAND_LIST_TYPE_DIRECT);
|
||||
command_lists_[i] = ui::d3d12::CommandList::Create(
|
||||
device, direct_queue, D3D12_COMMAND_LIST_TYPE_DIRECT);
|
||||
if (command_lists_setup_[i] == nullptr || command_lists_[i] == nullptr) {
|
||||
if (command_lists_[i] == nullptr) {
|
||||
XELOGE("Failed to create the command lists");
|
||||
return false;
|
||||
}
|
||||
|
@ -532,7 +530,6 @@ void D3D12CommandProcessor::ShutdownContext() {
|
|||
|
||||
for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) {
|
||||
command_lists_[i].reset();
|
||||
command_lists_setup_[i].reset();
|
||||
}
|
||||
|
||||
CommandProcessor::ShutdownContext();
|
||||
|
@ -752,7 +749,6 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// Ensure vertex and index buffers are resident and draw.
|
||||
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
|
||||
// validity will be tracked.
|
||||
shared_memory_->UseForReading(command_list);
|
||||
uint64_t vertex_buffers_resident[2] = {};
|
||||
for (const auto& vertex_binding : vertex_shader->vertex_bindings()) {
|
||||
uint32_t vfetch_index = vertex_binding.fetch_constant;
|
||||
|
@ -766,8 +762,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
XELOGGPU("Vertex fetch type is not 3!");
|
||||
return false;
|
||||
}
|
||||
shared_memory_->UseRange(regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
||||
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
|
||||
shared_memory_->RequestRange(
|
||||
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
||||
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC, command_list);
|
||||
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
||||
}
|
||||
if (indexed) {
|
||||
|
@ -777,7 +774,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
: sizeof(uint16_t);
|
||||
index_base &= ~(index_size - 1);
|
||||
uint32_t index_buffer_size = index_buffer_info->count * index_size;
|
||||
shared_memory_->UseRange(index_base, index_buffer_size);
|
||||
shared_memory_->RequestRange(index_base, index_buffer_size, command_list);
|
||||
|
||||
shared_memory_->UseForReading(command_list);
|
||||
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
|
||||
index_buffer_view.BufferLocation =
|
||||
shared_memory_->GetGPUAddress() + index_base;
|
||||
|
@ -788,6 +787,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
command_list->IASetIndexBuffer(&index_buffer_view);
|
||||
command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0);
|
||||
} else {
|
||||
shared_memory_->UseForReading(command_list);
|
||||
command_list->DrawInstanced(index_count, 1, 0, 0);
|
||||
}
|
||||
|
||||
|
@ -841,7 +841,6 @@ bool D3D12CommandProcessor::BeginFrame() {
|
|||
draw_sampler_full_update_ = 0;
|
||||
primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
||||
|
||||
command_lists_setup_[current_queue_frame_]->BeginRecording();
|
||||
command_lists_[current_queue_frame_]->BeginRecording();
|
||||
|
||||
constant_buffer_pool_->BeginFrame();
|
||||
|
@ -864,20 +863,11 @@ bool D3D12CommandProcessor::EndFrame() {
|
|||
|
||||
assert_false(scratch_buffer_used_);
|
||||
|
||||
auto command_list_setup = command_lists_setup_[current_queue_frame_].get();
|
||||
auto command_list = command_lists_[current_queue_frame_].get();
|
||||
|
||||
render_target_cache_->EndFrame();
|
||||
|
||||
bool setup_written = shared_memory_->EndFrame(
|
||||
command_list_setup->GetCommandList(), command_list->GetCommandList());
|
||||
shared_memory_->EndFrame();
|
||||
|
||||
if (setup_written) {
|
||||
command_list_setup->Execute();
|
||||
} else {
|
||||
command_list_setup->AbortRecording();
|
||||
}
|
||||
command_list->Execute();
|
||||
command_lists_[current_queue_frame_]->Execute();
|
||||
|
||||
sampler_heap_pool_->EndFrame();
|
||||
view_heap_pool_->EndFrame();
|
||||
|
|
|
@ -158,8 +158,6 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
|
||||
bool cache_clear_requested_ = false;
|
||||
|
||||
std::unique_ptr<ui::d3d12::CommandList>
|
||||
command_lists_setup_[ui::d3d12::D3D12Context::kQueuedFrames] = {};
|
||||
std::unique_ptr<ui::d3d12::CommandList>
|
||||
command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {};
|
||||
|
||||
|
|
|
@ -26,16 +26,10 @@ SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context)
|
|||
page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size()));
|
||||
page_count_ = kBufferSize >> page_size_log2_;
|
||||
uint32_t page_bitmap_length = page_count_ >> 6;
|
||||
uint32_t page_bitmap_l2_length = page_bitmap_length >> 6;
|
||||
assert_true(page_bitmap_l2_length > 0);
|
||||
|
||||
pages_in_sync_.resize(page_bitmap_length);
|
||||
assert_true(page_bitmap_length != 0);
|
||||
|
||||
valid_pages_.resize(page_bitmap_length);
|
||||
watched_pages_.resize(page_bitmap_length);
|
||||
watches_triggered_l1_.resize(page_bitmap_length);
|
||||
watches_triggered_l2_.resize(page_bitmap_l2_length);
|
||||
|
||||
upload_pages_.resize(page_bitmap_length);
|
||||
}
|
||||
|
||||
SharedMemory::~SharedMemory() { Shutdown(); }
|
||||
|
@ -79,15 +73,11 @@ bool SharedMemory::Initialize() {
|
|||
std::memset(heaps_, 0, sizeof(heaps_));
|
||||
heap_creation_failed_ = false;
|
||||
|
||||
std::memset(pages_in_sync_.data(), 0,
|
||||
pages_in_sync_.size() * sizeof(uint64_t));
|
||||
std::memset(valid_pages_.data(), 0, valid_pages_.size() * sizeof(uint64_t));
|
||||
|
||||
std::memset(watched_pages_.data(), 0,
|
||||
watched_pages_.size() * sizeof(uint64_t));
|
||||
std::memset(watches_triggered_l2_.data(), 0,
|
||||
watches_triggered_l2_.size() * sizeof(uint64_t));
|
||||
|
||||
std::memset(upload_pages_.data(), 0, upload_pages_.size() * sizeof(uint64_t));
|
||||
upload_buffer_pool_ =
|
||||
std::make_unique<ui::d3d12::UploadBufferPool>(context_, 4 * 1024 * 1024);
|
||||
|
||||
|
@ -118,157 +108,14 @@ void SharedMemory::Shutdown() {
|
|||
}
|
||||
|
||||
void SharedMemory::BeginFrame() {
|
||||
// Check triggered watches, clear them and mark modified pages as out of date.
|
||||
watch_mutex_.lock();
|
||||
for (uint32_t i = 0; i < watches_triggered_l2_.size(); ++i) {
|
||||
uint64_t bits_l2 = watches_triggered_l2_[i];
|
||||
uint32_t index_l1_local;
|
||||
while (xe::bit_scan_forward(bits_l2, &index_l1_local)) {
|
||||
bits_l2 &= ~(1ull << index_l1_local);
|
||||
uint32_t index_l1_global = (i << 6) + index_l1_local;
|
||||
pages_in_sync_[index_l1_global] &=
|
||||
~(watches_triggered_l1_[index_l1_global]);
|
||||
}
|
||||
watches_triggered_l2_[i] = 0;
|
||||
}
|
||||
watch_mutex_.unlock();
|
||||
|
||||
upload_buffer_pool_->BeginFrame();
|
||||
|
||||
heap_creation_failed_ = false;
|
||||
}
|
||||
|
||||
bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup,
|
||||
ID3D12GraphicsCommandList* command_list_draw) {
|
||||
// Before drawing starts, it's assumed that the buffer is a copy destination.
|
||||
// This transition is for the next frame, not for the current one.
|
||||
TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list_draw);
|
||||
void SharedMemory::EndFrame() { upload_buffer_pool_->EndFrame(); }
|
||||
|
||||
auto current_frame = context_->GetCurrentFrame();
|
||||
auto device = context_->GetD3D12Provider()->GetDevice();
|
||||
|
||||
// Write ranges to upload buffers and submit them.
|
||||
uint32_t upload_end = 0, upload_range_start = 0, upload_range_length;
|
||||
while ((upload_range_start =
|
||||
NextUploadRange(upload_end, upload_range_length)) != UINT_MAX) {
|
||||
/* XELOGGPU(
|
||||
"Shared memory: Uploading %.8X-%.8X range",
|
||||
upload_range_start << page_size_log2_,
|
||||
((upload_range_start + upload_range_length) << page_size_log2_) - 1); */
|
||||
while (upload_range_length > 0) {
|
||||
ID3D12Resource* upload_buffer;
|
||||
uint32_t upload_buffer_offset, upload_buffer_size;
|
||||
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
|
||||
upload_range_length << page_size_log2_, &upload_buffer,
|
||||
&upload_buffer_offset, &upload_buffer_size, nullptr);
|
||||
if (upload_buffer_mapping == nullptr) {
|
||||
XELOGE("Shared memory: Failed to get an upload buffer");
|
||||
break;
|
||||
}
|
||||
std::memcpy(
|
||||
upload_buffer_mapping,
|
||||
memory_->TranslatePhysical(upload_range_start << page_size_log2_),
|
||||
upload_buffer_size);
|
||||
command_list_setup->CopyBufferRegion(
|
||||
buffer_, upload_range_start << page_size_log2_, upload_buffer,
|
||||
upload_buffer_offset, upload_buffer_size);
|
||||
upload_range_start += upload_buffer_size >> page_size_log2_;
|
||||
upload_range_length -= upload_buffer_size >> page_size_log2_;
|
||||
upload_end = upload_range_start;
|
||||
}
|
||||
if (upload_range_length > 0) {
|
||||
// Buffer creation or mapping failed.
|
||||
break;
|
||||
}
|
||||
}
|
||||
upload_buffer_pool_->EndFrame();
|
||||
|
||||
// Protect the uploaded ranges.
|
||||
// TODO(Triang3l): Add L2 or store ranges in a list - this may hold the mutex
|
||||
// for pretty long.
|
||||
if (upload_end != 0) {
|
||||
watch_mutex_.lock();
|
||||
uint32_t protect_end = 0, protect_start, protect_length;
|
||||
while ((protect_start = NextUploadRange(protect_end, protect_length)) !=
|
||||
UINT_MAX) {
|
||||
if (protect_start >= upload_end) {
|
||||
break;
|
||||
}
|
||||
protect_length = std::min(protect_length, upload_end - protect_start);
|
||||
uint32_t protect_last = protect_start + protect_length - 1;
|
||||
uint32_t protect_block_first = protect_start >> 6;
|
||||
uint32_t protect_block_last = protect_last >> 6;
|
||||
for (uint32_t i = protect_block_first; i <= protect_block_last; ++i) {
|
||||
uint64_t protect_bits = ~0ull;
|
||||
if (i == protect_block_first) {
|
||||
protect_bits &= ~((1ull << (protect_start & 63)) - 1);
|
||||
}
|
||||
if (i == protect_block_last && (protect_last & 63) != 63) {
|
||||
protect_bits &= (1ull << ((protect_last & 63) + 1)) - 1;
|
||||
}
|
||||
watched_pages_[i] |= protect_bits;
|
||||
}
|
||||
memory_->ProtectPhysicalMemory(
|
||||
protect_start << page_size_log2_, protect_length << page_size_log2_,
|
||||
cpu::MMIOHandler::WatchType::kWatchWrite, false);
|
||||
protect_end = protect_last + 1;
|
||||
if (protect_end >= upload_end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
watch_mutex_.unlock();
|
||||
}
|
||||
|
||||
// Mark the newly uploaded ranges as uploaded.
|
||||
std::memset(upload_pages_.data(), 0, (upload_end >> 6) * sizeof(uint64_t));
|
||||
if (upload_end < page_count_) {
|
||||
upload_pages_[upload_end >> 6] &= ~((1ull << (upload_end & 63)) - 1);
|
||||
}
|
||||
|
||||
// If some upload failed, mark the pages not uploaded as out-of-date again
|
||||
// because they were marked as up-to-date when used as textures/buffers.
|
||||
if (upload_range_start != UINT_MAX) {
|
||||
for (uint32_t i = upload_end >> 6; i < upload_pages_.size(); ++i) {
|
||||
pages_in_sync_[i] &= ~(upload_pages_[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return upload_end != 0;
|
||||
}
|
||||
|
||||
uint32_t SharedMemory::NextUploadRange(uint32_t search_start,
|
||||
uint32_t& length) const {
|
||||
uint32_t search_start_block_index = search_start >> 6;
|
||||
for (uint32_t i = search_start_block_index; i < upload_pages_.size(); ++i) {
|
||||
uint64_t start_block = upload_pages_[i];
|
||||
if (i == search_start_block_index) {
|
||||
// Exclude already visited pages in the first checked 64-page block.
|
||||
start_block &= ~((1ull << (search_start & 63)) - 1);
|
||||
}
|
||||
uint32_t start_page_local;
|
||||
if (!xe::bit_scan_forward(start_block, &start_page_local)) {
|
||||
continue;
|
||||
}
|
||||
// Found the beginning of a range - find the end.
|
||||
uint32_t start_page = (i << 6) + start_page_local;
|
||||
for (uint32_t j = i; j < upload_pages_.size(); ++j) {
|
||||
uint64_t end_block = upload_pages_[j];
|
||||
if (j == i) {
|
||||
end_block |= (1ull << start_page_local) - 1;
|
||||
}
|
||||
uint32_t end_page_local;
|
||||
if (xe::bit_scan_forward(~end_block, &end_page_local)) {
|
||||
length = ((j << 6) + end_page_local) - start_page;
|
||||
return start_page;
|
||||
}
|
||||
}
|
||||
length = page_count_ - start_page;
|
||||
return start_page;
|
||||
}
|
||||
return UINT_MAX;
|
||||
}
|
||||
|
||||
bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
|
||||
bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
|
||||
ID3D12GraphicsCommandList* command_list) {
|
||||
if (length == 0) {
|
||||
// Some texture is empty, for example - safe to draw in this case.
|
||||
return true;
|
||||
|
@ -328,60 +175,161 @@ bool SharedMemory::UseRange(uint32_t start, uint32_t length) {
|
|||
}
|
||||
}
|
||||
|
||||
// Mark the outdated tiles in this range as requiring upload, and also make
|
||||
// them up-to-date so textures aren't invalidated every use.
|
||||
// TODO(Triang3l): Invalidate textures referencing outdated pages.
|
||||
// Safe invalidate textures here because only actually used ranges will be
|
||||
// uploaded and marked as in-sync at the end of the frame.
|
||||
uint32_t page_first_index = start >> page_size_log2_;
|
||||
uint32_t page_last_index = last >> page_size_log2_;
|
||||
uint32_t block_first_index = page_first_index >> 6;
|
||||
uint32_t block_last_index = page_last_index >> 6;
|
||||
for (uint32_t i = block_first_index; i <= block_last_index; ++i) {
|
||||
uint64_t block_outdated = ~pages_in_sync_[i];
|
||||
if (i == block_first_index) {
|
||||
block_outdated &= ~((1ull << (page_first_index & 63)) - 1);
|
||||
// Upload and watch used ranges.
|
||||
GetRangesToUpload(start >> page_size_log2_,
|
||||
((start & ((1 << page_size_log2_) - 1)) + length +
|
||||
((1 << page_size_log2_) - 1)) >>
|
||||
page_size_log2_);
|
||||
if (upload_ranges_.size() == 0) {
|
||||
return true;
|
||||
}
|
||||
TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list);
|
||||
for (auto upload_range : upload_ranges_) {
|
||||
uint32_t upload_range_start = upload_range.first;
|
||||
uint32_t upload_range_length = upload_range.second;
|
||||
while (upload_range_length != 0) {
|
||||
XELOGGPU(
|
||||
"Shared memory: Uploading %.8X:%.8X",
|
||||
upload_range_start << page_size_log2_,
|
||||
((upload_range_start + upload_range_length) << page_size_log2_) - 1);
|
||||
ID3D12Resource* upload_buffer;
|
||||
uint32_t upload_buffer_offset, upload_buffer_size;
|
||||
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
|
||||
upload_range_length << page_size_log2_, &upload_buffer,
|
||||
&upload_buffer_offset, &upload_buffer_size, nullptr);
|
||||
if (upload_buffer_mapping == nullptr) {
|
||||
XELOGE("Shared memory: Failed to get an upload buffer");
|
||||
return false;
|
||||
}
|
||||
uint32_t upload_buffer_pages = upload_buffer_size >> page_size_log2_;
|
||||
MakeRangeValid(upload_range_start, upload_buffer_pages);
|
||||
std::memcpy(
|
||||
upload_buffer_mapping,
|
||||
memory_->TranslatePhysical(upload_range_start << page_size_log2_),
|
||||
upload_buffer_size);
|
||||
command_list->CopyBufferRegion(
|
||||
buffer_, upload_range_start << page_size_log2_, upload_buffer,
|
||||
upload_buffer_offset, upload_buffer_size);
|
||||
upload_range_start += upload_buffer_pages;
|
||||
upload_range_length -= upload_buffer_pages;
|
||||
}
|
||||
if (i == block_last_index && (page_last_index & 63) != 63) {
|
||||
block_outdated &= (1ull << ((page_last_index & 63) + 1)) - 1;
|
||||
}
|
||||
pages_in_sync_[i] |= block_outdated;
|
||||
upload_pages_[i] |= block_outdated;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SharedMemory::MakeRangeValid(uint32_t valid_page_first,
|
||||
uint32_t valid_page_count) {
|
||||
if (valid_page_first >= page_count_ || valid_page_count == 0) {
|
||||
return;
|
||||
}
|
||||
valid_page_count = std::min(valid_page_count, page_count_ - valid_page_first);
|
||||
uint32_t valid_page_last = valid_page_first + valid_page_count - 1;
|
||||
uint32_t valid_block_first = valid_page_first >> 6;
|
||||
uint32_t valid_block_last = valid_page_last >> 6;
|
||||
|
||||
std::lock_guard<std::mutex> lock(validity_mutex_);
|
||||
|
||||
for (uint32_t i = valid_block_first; i <= valid_block_last; ++i) {
|
||||
uint64_t valid_bits = UINT64_MAX;
|
||||
if (i == valid_block_first) {
|
||||
valid_bits &= ~((1ull << (valid_page_first & 63)) - 1);
|
||||
}
|
||||
if (i == valid_block_last && (valid_page_last & 63) != 63) {
|
||||
valid_bits &= (1ull << ((valid_page_last & 63) + 1)) - 1;
|
||||
}
|
||||
valid_pages_[i] |= valid_bits;
|
||||
watched_pages_[i] |= valid_bits;
|
||||
}
|
||||
|
||||
memory_->ProtectPhysicalMemory(
|
||||
valid_page_first << page_size_log2_, valid_page_count << page_size_log2_,
|
||||
cpu::MMIOHandler::WatchType::kWatchWrite, false);
|
||||
}
|
||||
|
||||
void SharedMemory::GetRangesToUpload(uint32_t request_page_first,
|
||||
uint32_t request_page_count) {
|
||||
upload_ranges_.clear();
|
||||
if (request_page_first >= page_count_ || request_page_count == 0) {
|
||||
return;
|
||||
}
|
||||
request_page_count =
|
||||
std::min(request_page_count, page_count_ - request_page_first);
|
||||
uint32_t request_page_last = request_page_first + request_page_count - 1;
|
||||
uint32_t request_block_first = request_page_first >> 6;
|
||||
uint32_t request_block_last = request_page_last >> 6;
|
||||
|
||||
std::lock_guard<std::mutex> lock(validity_mutex_);
|
||||
|
||||
uint32_t range_start = UINT32_MAX;
|
||||
for (uint32_t i = request_block_first; i <= request_block_last; ++i) {
|
||||
uint64_t block_valid = valid_pages_[i];
|
||||
uint64_t block_invalid = ~block_valid;
|
||||
|
||||
// Ignore pages outside the requested range in bits scans completely.
|
||||
uint64_t bits_to_keep;
|
||||
if (i == request_block_first) {
|
||||
bits_to_keep = ~((1ull << (request_page_first & 63)) - 1);
|
||||
block_valid &= bits_to_keep;
|
||||
block_invalid &= bits_to_keep;
|
||||
}
|
||||
if (i == request_block_last && (request_page_last & 63) != 63) {
|
||||
bits_to_keep = (1ull << ((request_page_last & 63) + 1)) - 1;
|
||||
block_valid &= bits_to_keep;
|
||||
block_invalid &= bits_to_keep;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
uint32_t block_page;
|
||||
if (range_start == UINT32_MAX) {
|
||||
// Check if need to open a new range.
|
||||
if (!xe::bit_scan_forward(block_invalid, &block_page)) {
|
||||
break;
|
||||
}
|
||||
range_start = (i << 6) + block_page;
|
||||
} else {
|
||||
// Check if need to close the range.
|
||||
if (!xe::bit_scan_forward(block_valid, &block_page)) {
|
||||
break;
|
||||
}
|
||||
upload_ranges_.push_back(
|
||||
std::make_pair(range_start, (i << 6) + block_page - range_start));
|
||||
range_start = UINT32_MAX;
|
||||
}
|
||||
// There may be multiple ranges within a single block, so ignore the bits
|
||||
// that have already been processed.
|
||||
bits_to_keep = ~((1ull << block_page) - 1);
|
||||
block_valid &= bits_to_keep;
|
||||
block_invalid &= bits_to_keep;
|
||||
}
|
||||
}
|
||||
if (range_start != UINT32_MAX) {
|
||||
upload_ranges_.push_back(
|
||||
std::make_pair(range_start, request_page_last + 1 - range_start));
|
||||
}
|
||||
}
|
||||
|
||||
bool SharedMemory::WatchCallbackThunk(void* context_ptr, uint32_t address) {
|
||||
return reinterpret_cast<SharedMemory*>(context_ptr)->WatchCallback(address);
|
||||
}
|
||||
|
||||
bool SharedMemory::WatchCallback(uint32_t address) {
|
||||
address &= 0x1FFFFFFF;
|
||||
uint32_t page_index_l1_global = address >> page_size_log2_;
|
||||
uint32_t block_index_l1 = page_index_l1_global >> 6;
|
||||
uint64_t page_bit_l1 = 1ull << (page_index_l1_global & 63);
|
||||
uint32_t page_index = (address & kAddressMask) >> page_size_log2_;
|
||||
uint32_t block_index = page_index >> 6;
|
||||
uint64_t page_bit = 1ull << (page_index & 63);
|
||||
|
||||
std::lock_guard<std::mutex> lock(watch_mutex_);
|
||||
if (!(watched_pages_[block_index_l1] & page_bit_l1)) {
|
||||
std::lock_guard<std::mutex> lock(validity_mutex_);
|
||||
|
||||
if (!(watched_pages_[block_index] & page_bit)) {
|
||||
return false;
|
||||
}
|
||||
// XELOGGPU("Shared memory: Watch triggered for %.8X", address);
|
||||
|
||||
// Mark the page as modified.
|
||||
uint32_t block_index_l2 = block_index_l1 >> 6;
|
||||
uint64_t page_bit_l2 = 1ull << (block_index_l1 & 63);
|
||||
if (!(watches_triggered_l2_[block_index_l2] & page_bit_l2)) {
|
||||
watches_triggered_l2_[block_index_l2] |= page_bit_l2;
|
||||
// L1 is not cleared in BeginFrame, so clear it now.
|
||||
watches_triggered_l1_[block_index_l1] = 0;
|
||||
}
|
||||
watches_triggered_l1_[block_index_l1] |= page_bit_l1;
|
||||
valid_pages_[block_index] &= ~page_bit;
|
||||
// TODO(Triang3l): Invoke texture invalidation callbacks.
|
||||
|
||||
// Unprotect the page.
|
||||
memory_->UnprotectPhysicalMemory(page_index_l1_global << page_size_log2_,
|
||||
memory_->UnprotectPhysicalMemory(page_index << page_size_log2_,
|
||||
1 << page_size_log2_, false);
|
||||
watched_pages_[block_index_l1] &= ~page_bit_l1;
|
||||
watched_pages_[block_index] &= ~page_bit;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
|
@ -40,15 +41,15 @@ class SharedMemory {
|
|||
void BeginFrame();
|
||||
// Returns true if anything has been written to command_list been done.
|
||||
// The draw command list is needed for the transition.
|
||||
bool EndFrame(ID3D12GraphicsCommandList* command_list_setup,
|
||||
ID3D12GraphicsCommandList* command_list_draw);
|
||||
void EndFrame();
|
||||
|
||||
// Marks the range as used in this frame, queues it for upload if it was
|
||||
// modified. Ensures the backing memory for the address range is present in
|
||||
// the tiled buffer, allocating if needed. If couldn't allocate, false is
|
||||
// returned - it's unsafe to use this portion (on tiled resources tier 1 at
|
||||
// least).
|
||||
bool UseRange(uint32_t start, uint32_t length);
|
||||
// Checks if the range has been updated, uploads new data if needed and
|
||||
// ensures the buffer tiles backing the range are resident. May transition the
|
||||
// tiled buffer to copy destination - call this before UseForReading or
|
||||
// UseForWriting. Returns true if the range has been fully updated and is
|
||||
// usable.
|
||||
bool RequestRange(uint32_t start, uint32_t length,
|
||||
ID3D12GraphicsCommandList* command_list);
|
||||
|
||||
// Makes the buffer usable for vertices, indices and texture untiling.
|
||||
void UseForReading(ID3D12GraphicsCommandList* command_list);
|
||||
|
@ -87,32 +88,30 @@ class SharedMemory {
|
|||
// Total physical page count.
|
||||
uint32_t page_count_;
|
||||
|
||||
// Mutex between the exception handler and the command processor, to be locked
|
||||
// when checking or updating validity of pages/ranges.
|
||||
std::mutex validity_mutex_;
|
||||
// Bit vector containing whether physical memory system pages are up to date.
|
||||
std::vector<uint64_t> pages_in_sync_;
|
||||
std::vector<uint64_t> valid_pages_;
|
||||
// Mark the memory range as updated and watch it.
|
||||
void MakeRangeValid(uint32_t valid_page_first, uint32_t valid_page_count);
|
||||
|
||||
// First page and length in pages.
|
||||
typedef std::pair<uint32_t, uint32_t> UploadRange;
|
||||
// Ranges that need to be uploaded, generated by GetRangesToUpload (a
|
||||
// persistently allocated vector).
|
||||
std::vector<UploadRange> upload_ranges_;
|
||||
void GetRangesToUpload(uint32_t request_page_first,
|
||||
uint32_t request_page_count);
|
||||
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
|
||||
|
||||
// Mutex for the watched pages and the triggered watches.
|
||||
std::mutex watch_mutex_;
|
||||
// Whether each physical page is watched by the GPU (after uploading).
|
||||
// Once a watch is triggered, it's not watched anymore.
|
||||
std::vector<uint64_t> watched_pages_;
|
||||
// Whether each page was modified while the current frame is being processed.
|
||||
// This is checked and cleared in the beginning of a GPU frame.
|
||||
// Because this is done with a locked CPU-GPU mutex, it's stored in 2 levels,
|
||||
// so unmodified pages can be skipped quickly, and clearing is also fast.
|
||||
// On L1, each bit corresponds to a single page, on L2, to 64 pages.
|
||||
// Checking if L2 is non-zero before accessing L1 is REQUIRED since L1 is not
|
||||
// cleared!
|
||||
std::vector<uint64_t> watches_triggered_l1_;
|
||||
std::vector<uint64_t> watches_triggered_l2_;
|
||||
// Memory access callback.
|
||||
static bool WatchCallbackThunk(void* context_ptr, uint32_t address);
|
||||
bool WatchCallback(uint32_t address);
|
||||
|
||||
// Pages that need to be uploaded in this frame (that are used but modified).
|
||||
std::vector<uint64_t> upload_pages_;
|
||||
uint32_t NextUploadRange(uint32_t search_start, uint32_t& length) const;
|
||||
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
|
||||
|
||||
void TransitionBuffer(D3D12_RESOURCE_STATES new_state,
|
||||
ID3D12GraphicsCommandList* command_list);
|
||||
};
|
||||
|
|
|
@ -723,14 +723,14 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
|
||||
// Request uploading of the texture data to the shared memory.
|
||||
if (!texture->base_in_sync) {
|
||||
if (!shared_memory_->UseRange(texture->key.base_page << 12,
|
||||
texture->base_size)) {
|
||||
if (!shared_memory_->RequestRange(texture->key.base_page << 12,
|
||||
texture->base_size, command_list)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!texture->mips_in_sync) {
|
||||
if (!shared_memory_->UseRange(texture->key.mip_page << 12,
|
||||
texture->mip_size)) {
|
||||
if (!shared_memory_->RequestRange(texture->key.mip_page << 12,
|
||||
texture->mip_size, command_list)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -230,8 +230,8 @@ class TextureCache {
|
|||
// made.
|
||||
Texture* FindOrCreateTexture(TextureKey key);
|
||||
|
||||
// Writes data from the shared memory to the texture. This binds pipelines and
|
||||
// allocates descriptors!
|
||||
// Writes data from the shared memory to the texture. This binds pipelines,
|
||||
// allocates descriptors and copies!
|
||||
bool LoadTextureData(Texture* texture);
|
||||
|
||||
// Makes all bindings invalid. Also requesting textures after calling this
|
||||
|
|
Loading…
Reference in New Issue