[D3D12] CP: Framework for multiple command lists per frame

This commit is contained in:
Triang3l 2019-10-31 23:32:37 +03:00
parent 6e5a0ebf7b
commit 9deb710607
12 changed files with 401 additions and 238 deletions

View File

@ -55,7 +55,7 @@ namespace xe {
namespace gpu {
namespace d3d12 {
constexpr uint32_t D3D12CommandProcessor::kQueuedFrames;
constexpr uint32_t D3D12CommandProcessor::kQueueFrames;
constexpr uint32_t
D3D12CommandProcessor::RootExtraParameterIndices::kUnavailable;
constexpr uint32_t D3D12CommandProcessor::kSwapTextureWidth;
@ -441,7 +441,7 @@ uint64_t D3D12CommandProcessor::RequestViewDescriptors(
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) {
uint32_t descriptor_index;
uint64_t current_heap_index = view_heap_pool_->Request(
fence_current_value_, previous_heap_index, count_for_partial_update,
frame_current_, previous_heap_index, count_for_partial_update,
count_for_full_update, descriptor_index);
if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
// There was an error.
@ -467,7 +467,7 @@ uint64_t D3D12CommandProcessor::RequestSamplerDescriptors(
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) {
uint32_t descriptor_index;
uint64_t current_heap_index = sampler_heap_pool_->Request(
fence_current_value_, previous_heap_index, count_for_partial_update,
frame_current_, previous_heap_index, count_for_partial_update,
count_for_full_update, descriptor_index);
if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
// There was an error.
@ -520,7 +520,7 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer(
if (scratch_buffer_ != nullptr) {
BufferForDeletion buffer_for_deletion;
buffer_for_deletion.buffer = scratch_buffer_;
buffer_for_deletion.last_usage_fence_value = fence_current_value_;
buffer_for_deletion.last_usage_submission = submission_current_;
buffers_for_deletion_.push_back(buffer_for_deletion);
}
scratch_buffer_ = buffer;
@ -552,10 +552,7 @@ void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) {
if (cvars::d3d12_ssaa_custom_sample_positions && !IsROVUsedForEDRAM()) {
auto provider = GetD3D12Context()->GetD3D12Provider();
auto tier = provider->GetProgrammableSamplePositionsTier();
uint32_t command_list_index =
uint32_t((fence_current_value_ + (kQueuedFrames - 1)) % kQueuedFrames);
if (tier >= 2 &&
command_lists_[command_list_index]->GetCommandList1() != nullptr) {
if (tier >= 2) {
// Depth buffer transitions are affected by sample positions.
SubmitBarriers();
// Standard sample positions in Direct3D 10.1, but adjusted to take the
@ -611,13 +608,14 @@ void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
}
}
void D3D12CommandProcessor::UnbindRenderTargets() {
render_target_cache_->UnbindRenderTargets();
void D3D12CommandProcessor::FlushAndUnbindRenderTargets() {
render_target_cache_->FlushAndUnbindRenderTargets();
}
void D3D12CommandProcessor::SetExternalGraphicsPipeline(
ID3D12PipelineState* pipeline, bool reset_viewport, bool reset_blend_factor,
bool reset_stencil_ref) {
ID3D12PipelineState* pipeline, bool changing_rts_and_sample_positions,
bool changing_viewport, bool changing_blend_factor,
bool changing_stencil_ref) {
if (current_external_pipeline_ != pipeline) {
deferred_command_list_->D3DSetPipelineState(pipeline);
current_external_pipeline_ = pipeline;
@ -626,14 +624,17 @@ void D3D12CommandProcessor::SetExternalGraphicsPipeline(
current_graphics_root_signature_ = nullptr;
current_graphics_root_up_to_date_ = 0;
primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
if (reset_viewport) {
if (changing_rts_and_sample_positions) {
render_target_cache_->ForceApplyOnNextUpdate();
}
if (changing_viewport) {
ff_viewport_update_needed_ = true;
ff_scissor_update_needed_ = true;
}
if (reset_blend_factor) {
if (changing_blend_factor) {
ff_blend_factor_update_needed_ = true;
}
if (reset_stencil_ref) {
if (changing_stencil_ref) {
ff_stencil_ref_update_needed_ = true;
}
}
@ -672,7 +673,9 @@ std::unique_ptr<xe::ui::RawImage> D3D12CommandProcessor::Capture() {
deferred_command_list_->CopyTexture(location_dest, location_source);
PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_COPY_SOURCE,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
EndSubmission(false);
if (!EndSubmission(false)) {
return nullptr;
}
AwaitAllSubmissionsCompletion();
D3D12_RANGE readback_range;
readback_range.Begin = swap_texture_copy_footprint_.Offset;
@ -711,27 +714,30 @@ bool D3D12CommandProcessor::SetupContext() {
auto device = provider->GetDevice();
auto direct_queue = provider->GetDirectQueue();
submission_open_ = false;
submission_current_ = 1;
submission_completed_ = 0;
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
IID_PPV_ARGS(&fence_)))) {
XELOGE("Failed to create the fence");
IID_PPV_ARGS(&submission_fence_)))) {
XELOGE("Failed to create the submission fence");
return false;
}
fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr);
if (fence_completion_event_ == nullptr) {
XELOGE("Failed to create the fence completion event");
submission_fence_completion_event_ =
CreateEvent(nullptr, false, false, nullptr);
if (submission_fence_completion_event_ == nullptr) {
XELOGE("Failed to create the submission fence completion event");
return false;
}
fence_current_value_ = 1;
fence_completed_value_ = 0;
for (uint32_t i = 0; i < kQueuedFrames; ++i) {
command_lists_[i] = ui::d3d12::CommandList::Create(
device, direct_queue, D3D12_COMMAND_LIST_TYPE_DIRECT);
if (command_lists_[i] == nullptr) {
XELOGE("Failed to create the command lists");
return false;
}
}
frame_open_ = false;
frame_current_ = 1;
frame_completed_ = 0;
std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_));
command_list_writable_first_ = nullptr;
command_list_writable_last_ = nullptr;
command_list_submitted_first_ = nullptr;
command_list_submitted_last_ = nullptr;
deferred_command_list_ = std::make_unique<DeferredCommandList>(this);
constant_buffer_pool_ =
@ -806,9 +812,9 @@ bool D3D12CommandProcessor::SetupContext() {
return false;
}
// Get the layout for the upload buffer.
gamma_ramp_desc.DepthOrArraySize = kQueuedFrames;
gamma_ramp_desc.DepthOrArraySize = kQueueFrames;
UINT64 gamma_ramp_upload_size;
device->GetCopyableFootprints(&gamma_ramp_desc, 0, kQueuedFrames * 2, 0,
device->GetCopyableFootprints(&gamma_ramp_desc, 0, kQueueFrames * 2, 0,
gamma_ramp_footprints_, nullptr, nullptr,
&gamma_ramp_upload_size);
// Create the upload buffer for the gamma ramp.
@ -892,9 +898,6 @@ bool D3D12CommandProcessor::SetupContext() {
swap_texture_, &swap_srv_desc,
swap_texture_srv_descriptor_heap_->GetCPUDescriptorHandleForHeapStart());
submission_open_ = false;
submission_frame_open_ = false;
pix_capture_requested_.store(false, std::memory_order_relaxed);
pix_capturing_ = false;
@ -967,18 +970,22 @@ void D3D12CommandProcessor::ShutdownContext() {
shared_memory_.reset();
deferred_command_list_.reset();
for (uint32_t i = 0; i < kQueuedFrames; ++i) {
command_lists_[i].reset();
}
ClearCommandListCache();
frame_open_ = false;
frame_current_ = 1;
frame_completed_ = 0;
std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_));
// First release the fence since it may reference the event.
ui::d3d12::util::ReleaseAndNull(fence_);
if (fence_completion_event_) {
CloseHandle(fence_completion_event_);
fence_completion_event_ = nullptr;
ui::d3d12::util::ReleaseAndNull(submission_fence_);
if (submission_fence_completion_event_) {
CloseHandle(submission_fence_completion_event_);
submission_fence_completion_event_ = nullptr;
}
fence_current_value_ = 1;
fence_completed_value_ = 0;
submission_open_ = false;
submission_current_ = 1;
submission_completed_ = 0;
CommandProcessor::ShutdownContext();
}
@ -988,7 +995,7 @@ void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X &&
index <= XE_GPU_REG_SHADER_CONSTANT_511_W) {
if (submission_open_) {
if (frame_open_) {
uint32_t float_constant_index =
(index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2;
if (float_constant_index >= 256) {
@ -1038,12 +1045,13 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
auto provider = GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
// Upload the new gamma ramps.
uint32_t command_list_index =
uint32_t((fence_current_value_ + (kQueuedFrames - 1)) % kQueuedFrames);
// Upload the new gamma ramps, using the upload buffer for the current frame
// (will close the frame after this anyway, so can't write multiple times per
// frame).
uint32_t gamma_ramp_frame = uint32_t(frame_current_ % kQueueFrames);
if (dirty_gamma_ramp_normal_) {
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint =
gamma_ramp_footprints_[command_list_index * 2];
gamma_ramp_footprints_[gamma_ramp_frame * 2];
volatile uint32_t* mapping = reinterpret_cast<uint32_t*>(
gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset);
for (uint32_t i = 0; i < 256; ++i) {
@ -1069,7 +1077,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
}
if (dirty_gamma_ramp_pwl_) {
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint =
gamma_ramp_footprints_[command_list_index * 2 + 1];
gamma_ramp_footprints_[gamma_ramp_frame * 2 + 1];
volatile uint32_t* mapping = reinterpret_cast<uint32_t*>(
gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset);
for (uint32_t i = 0; i < 128; ++i) {
@ -1102,7 +1110,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
TextureFormat frontbuffer_format;
if (texture_cache_->RequestSwapTexture(descriptor_cpu_start,
frontbuffer_format)) {
render_target_cache_->UnbindRenderTargets();
render_target_cache_->FlushAndUnbindRenderTargets();
// Create the gamma ramp texture descriptor.
// This is according to D3D::InitializePresentationParameters from a game
@ -1161,8 +1169,8 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
descriptor_gpu_start, &gamma_ramp_gpu_handle,
use_pwl_gamma_ramp ? (1.0f / 128.0f) : (1.0f / 256.0f),
*deferred_command_list_);
// Ending the current frame's command list anyway, so no need to unbind
// the render targets when using ROV.
// Ending the current frame anyway, so no need to reset the current render
// targets when using ROV.
PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
@ -1683,7 +1691,6 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
memexport_range.base_address_dwords << 2, memexport_range_size);
readback_buffer_offset += memexport_range_size;
}
EndSubmission(false);
AwaitAllSubmissionsCompletion();
D3D12_RANGE readback_range;
readback_range.Begin = 0;
@ -1713,13 +1720,13 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
void D3D12CommandProcessor::InitializeTrace() {
BeginSubmission(false);
bool any_submitted = false;
any_submitted |= shared_memory_->InitializeTraceSubmitDownloads();
if (any_submitted) {
EndSubmission(false);
bool any_downloads_submitted = false;
any_downloads_submitted |= shared_memory_->InitializeTraceSubmitDownloads();
if (!any_downloads_submitted || !EndSubmission(false)) {
return;
}
AwaitAllSubmissionsCompletion();
shared_memory_->InitializeTraceCompleteDownloads();
}
}
void D3D12CommandProcessor::FinalizeTrace() {}
@ -1746,7 +1753,7 @@ bool D3D12CommandProcessor::IssueCopy() {
deferred_command_list_->D3DCopyBufferRegion(
readback_buffer, 0, shared_memory_buffer, written_address,
written_length);
EndSubmission(false);
if (EndSubmission(false)) {
AwaitAllSubmissionsCompletion();
D3D12_RANGE readback_range;
readback_range.Begin = 0;
@ -1761,6 +1768,7 @@ bool D3D12CommandProcessor::IssueCopy() {
}
}
}
}
return true;
}
@ -1769,20 +1777,75 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
bool is_opening_frame = is_guest_command && !frame_open_;
if (submission_open_ && !is_opening_frame) {
return;
}
// Check the fence - needed for all kinds of submissions (to reclaim transient
// resources early) and specifically for frames (not to queue too many).
submission_completed_ = submission_fence_->GetCompletedValue();
if (is_opening_frame) {
// Await the availability of the current frame.
uint64_t frame_current_last_submission =
closed_frame_submissions_[frame_current_ % kQueueFrames];
if (frame_current_last_submission > submission_completed_) {
submission_fence_->SetEventOnCompletion(
frame_current_last_submission, submission_fence_completion_event_);
WaitForSingleObject(submission_fence_completion_event_, INFINITE);
submission_completed_ = submission_fence_->GetCompletedValue();
}
// Update the completed frame index, also obtaining the actual completed
// frame number (since the CPU may be actually less than 3 frames behind)
// before reclaiming resources tracked with the frame number.
frame_completed_ =
std::max(frame_current_, uint64_t(kQueueFrames)) - kQueueFrames;
for (uint64_t frame = frame_completed_ + 1; frame < frame_current_;
++frame) {
if (closed_frame_submissions_[frame % kQueueFrames] >
submission_completed_) {
break;
}
frame_completed_ = frame;
}
}
// Reclaim command lists.
while (command_list_submitted_first_) {
if (command_list_submitted_first_->last_usage_submission >
submission_completed_) {
break;
}
if (command_list_writable_last_) {
command_list_writable_last_->next = command_list_submitted_first_;
} else {
command_list_writable_first_ = command_list_submitted_first_;
}
command_list_writable_last_ = command_list_submitted_first_;
command_list_submitted_first_ = command_list_submitted_first_->next;
command_list_writable_last_->next = nullptr;
}
if (!command_list_submitted_first_) {
command_list_submitted_last_ = nullptr;
}
// Delete transient buffers marked for deletion.
auto erase_buffers_end = buffers_for_deletion_.begin();
while (erase_buffers_end != buffers_for_deletion_.end()) {
if (erase_buffers_end->last_usage_submission > submission_completed_) {
++erase_buffers_end;
break;
}
erase_buffers_end->buffer->Release();
++erase_buffers_end;
}
buffers_for_deletion_.erase(buffers_for_deletion_.begin(), erase_buffers_end);
if (!submission_open_) {
submission_open_ = true;
// Wait for a swap command list to become free.
// Command list 0 is used when fence_current_value_ is 1, 4, 7...
fence_completed_value_ = fence_->GetCompletedValue();
if (fence_completed_value_ + kQueuedFrames < fence_current_value_) {
fence_->SetEventOnCompletion(fence_current_value_ - kQueuedFrames,
fence_completion_event_);
WaitForSingleObject(fence_completion_event_, INFINITE);
fence_completed_value_ = fence_->GetCompletedValue();
}
// Start a new command list.
// Start a new deferred command list - will create the real one in the end
// of submission.
deferred_command_list_->Reset();
// Reset cached state of the command list.
@ -1799,32 +1862,17 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
current_sampler_heap_ = nullptr;
primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
shared_memory_->BeginSubmission();
render_target_cache_->BeginSubmission();
primitive_converter_->BeginSubmission();
}
if (!submission_frame_open_) {
submission_frame_open_ = true;
if (is_opening_frame) {
frame_open_ = true;
// TODO(Triang3l): Move fence checking and command list releasing here.
// Cleanup resources after checking the fence.
auto erase_buffers_end = buffers_for_deletion_.begin();
while (erase_buffers_end != buffers_for_deletion_.end()) {
if (erase_buffers_end->last_usage_fence_value > fence_completed_value_) {
++erase_buffers_end;
break;
}
erase_buffers_end->buffer->Release();
++erase_buffers_end;
}
buffers_for_deletion_.erase(buffers_for_deletion_.begin(),
erase_buffers_end);
// Reset bindings that depend on the resources with lifetime tracked with
// the fence.
// Reset bindings that depend on the data stored in the pools.
std::memset(current_float_constant_map_vertex_, 0,
sizeof(current_float_constant_map_vertex_));
std::memset(current_float_constant_map_pixel_, 0,
@ -1841,9 +1889,11 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
samplers_written_vertex_ = false;
samplers_written_pixel_ = false;
constant_buffer_pool_->Reclaim(fence_completed_value_);
view_heap_pool_->Reclaim(fence_completed_value_);
sampler_heap_pool_->Reclaim(fence_completed_value_);
// Reclaim pool pages - no need to do this every small submission since some
// may be reused.
constant_buffer_pool_->Reclaim(frame_completed_);
view_heap_pool_->Reclaim(frame_completed_);
sampler_heap_pool_->Reclaim(frame_completed_);
pix_capturing_ =
pix_capture_requested_.exchange(false, std::memory_order_relaxed);
@ -1855,18 +1905,36 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
}
}
shared_memory_->BeginFrame();
texture_cache_->BeginFrame();
primitive_converter_->BeginFrame();
}
}
void D3D12CommandProcessor::EndSubmission(bool is_swap) {
bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
auto provider = GetD3D12Context()->GetD3D12Provider();
if (is_swap && submission_frame_open_) {
// Make sure there is a command list to submit to.
if (submission_open_ && !command_list_writable_first_) {
std::unique_ptr<ui::d3d12::CommandList> new_command_list =
ui::d3d12::CommandList::Create(provider->GetDevice(),
provider->GetDirectQueue(),
D3D12_COMMAND_LIST_TYPE_DIRECT);
if (!new_command_list) {
// Try to submit later. Completely dropping the submission is not
// permitted because resources would be left in an undefined state.
return false;
}
command_list_writable_first_ = new CommandList;
command_list_writable_first_->command_list = std::move(new_command_list);
command_list_writable_first_->last_usage_submission = 0;
command_list_writable_first_->next = nullptr;
command_list_writable_last_ = command_list_writable_first_;
}
bool is_closing_frame = is_swap && frame_open_;
if (is_closing_frame) {
texture_cache_->EndFrame();
}
@ -1875,27 +1943,37 @@ void D3D12CommandProcessor::EndSubmission(bool is_swap) {
pipeline_cache_->EndSubmission();
render_target_cache_->EndSubmission();
// Submit barriers now because resources with the queued barriers may be
// destroyed between frames.
SubmitBarriers();
// Submit the command list.
uint32_t command_list_index =
uint32_t((fence_current_value_ + (kQueuedFrames - 1)) % kQueuedFrames);
auto current_command_list = command_lists_[command_list_index].get();
auto current_command_list =
command_list_writable_first_->command_list.get();
current_command_list->BeginRecording();
deferred_command_list_->Execute(current_command_list->GetCommandList(),
current_command_list->GetCommandList1());
current_command_list->Execute();
command_list_writable_first_->last_usage_submission = submission_current_;
if (command_list_submitted_last_) {
command_list_submitted_last_->next = command_list_writable_first_;
} else {
command_list_submitted_first_ = command_list_writable_first_;
}
command_list_submitted_last_ = command_list_writable_first_;
command_list_writable_first_ = command_list_writable_first_->next;
command_list_submitted_last_->next = nullptr;
if (!command_list_writable_first_) {
command_list_writable_last_ = nullptr;
}
provider->GetDirectQueue()->Signal(fence_, fence_current_value_++);
provider->GetDirectQueue()->Signal(submission_fence_,
submission_current_++);
submission_open_ = false;
}
if (is_swap && submission_frame_open_) {
if (is_closing_frame) {
// Close the capture after submitting.
if (pix_capturing_) {
IDXGraphicsAnalysis* graphics_analysis = provider->GetGraphicsAnalysis();
@ -1904,12 +1982,17 @@ void D3D12CommandProcessor::EndSubmission(bool is_swap) {
}
pix_capturing_ = false;
}
submission_frame_open_ = false;
frame_open_ = false;
// Submission already closed now, so minus 1.
closed_frame_submissions_[(frame_current_++) % kQueueFrames] =
submission_current_ - 1;
if (cache_clear_requested_) {
cache_clear_requested_ = false;
AwaitAllSubmissionsCompletion();
ClearCommandListCache();
ui::d3d12::util::ReleaseAndNull(scratch_buffer_);
scratch_buffer_size_ = 0;
@ -1934,18 +2017,35 @@ void D3D12CommandProcessor::EndSubmission(bool is_swap) {
// shared_memory_->ClearCache();
}
}
return true;
}
void D3D12CommandProcessor::AwaitAllSubmissionsCompletion() {
// May be called if shutting down without everything set up.
if ((fence_completed_value_ + 1) >= fence_current_value_ || !fence_ ||
GetD3D12Context()->WasLost()) {
if ((submission_completed_ + 1) >= submission_current_ ||
!submission_fence_ || GetD3D12Context()->WasLost()) {
return;
}
fence_->SetEventOnCompletion(fence_current_value_ - 1,
fence_completion_event_);
WaitForSingleObject(fence_completion_event_, INFINITE);
fence_completed_value_ = fence_current_value_ - 1;
submission_fence_->SetEventOnCompletion(submission_current_ - 1,
submission_fence_completion_event_);
WaitForSingleObject(submission_fence_completion_event_, INFINITE);
submission_completed_ = submission_current_ - 1;
}
void D3D12CommandProcessor::ClearCommandListCache() {
while (command_list_submitted_first_) {
auto next = command_list_submitted_first_->next;
delete command_list_submitted_first_;
command_list_submitted_first_ = next;
}
command_list_submitted_last_ = nullptr;
while (command_list_writable_first_) {
auto next = command_list_writable_first_->next;
delete command_list_writable_first_;
command_list_writable_first_ = next;
}
command_list_writable_last_ = nullptr;
}
void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
@ -2819,9 +2919,8 @@ bool D3D12CommandProcessor::UpdateBindings(
// Update constant buffers.
if (!cbuffer_bindings_system_.up_to_date) {
uint8_t* system_constants = constant_buffer_pool_->Request(
fence_current_value_,
xe::align(uint32_t(sizeof(system_constants_)), 256u), nullptr, nullptr,
&cbuffer_bindings_system_.buffer_address);
frame_current_, xe::align(uint32_t(sizeof(system_constants_)), 256u),
nullptr, nullptr, &cbuffer_bindings_system_.buffer_address);
if (system_constants == nullptr) {
return false;
}
@ -2832,7 +2931,7 @@ bool D3D12CommandProcessor::UpdateBindings(
}
if (!cbuffer_bindings_float_vertex_.up_to_date) {
uint8_t* float_constants = constant_buffer_pool_->Request(
fence_current_value_, float_constant_size_vertex, nullptr, nullptr,
frame_current_, float_constant_size_vertex, nullptr, nullptr,
&cbuffer_bindings_float_vertex_.buffer_address);
if (float_constants == nullptr) {
return false;
@ -2857,7 +2956,7 @@ bool D3D12CommandProcessor::UpdateBindings(
}
if (!cbuffer_bindings_float_pixel_.up_to_date) {
uint8_t* float_constants = constant_buffer_pool_->Request(
fence_current_value_, float_constant_size_pixel, nullptr, nullptr,
frame_current_, float_constant_size_pixel, nullptr, nullptr,
&cbuffer_bindings_float_pixel_.buffer_address);
if (float_constants == nullptr) {
return false;
@ -2887,7 +2986,7 @@ bool D3D12CommandProcessor::UpdateBindings(
if (!cbuffer_bindings_bool_loop_.up_to_date) {
uint32_t* bool_loop_constants =
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
fence_current_value_, 768, nullptr, nullptr,
frame_current_, 768, nullptr, nullptr,
&cbuffer_bindings_bool_loop_.buffer_address));
if (bool_loop_constants == nullptr) {
return false;
@ -2906,8 +3005,8 @@ bool D3D12CommandProcessor::UpdateBindings(
write_bool_loop_constant_view = true;
}
if (!cbuffer_bindings_fetch_.up_to_date) {
uint8_t* fetch_constants = constant_buffer_pool_->Request(
fence_current_value_, 768, nullptr, nullptr,
uint8_t* fetch_constants =
constant_buffer_pool_->Request(frame_current_, 768, nullptr, nullptr,
&cbuffer_bindings_fetch_.buffer_address);
if (fetch_constants == nullptr) {
return false;

View File

@ -53,7 +53,8 @@ class D3D12CommandProcessor : public CommandProcessor {
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
}
// Returns the deferred drawing command list for the currently open frame.
// Returns the deferred drawing command list for the currently open
// submission.
DeferredCommandList* GetDeferredCommandList() {
return deferred_command_list_.get();
}
@ -63,8 +64,11 @@ class D3D12CommandProcessor : public CommandProcessor {
// targets.
bool IsROVUsedForEDRAM() const;
uint64_t GetCurrentFenceValue() const { return fence_current_value_; }
uint64_t GetCompletedFenceValue() const { return fence_completed_value_; }
uint64_t GetCurrentSubmission() const { return submission_current_; }
uint64_t GetCompletedSubmission() const { return submission_completed_; }
uint64_t GetCurrentFrame() const { return frame_current_; }
uint64_t GetCompletedFrame() const { return frame_completed_; }
// Gets the current color write mask, taking the pixel shader's write mask
// into account. If a shader doesn't write to a render target, it shouldn't be
@ -106,8 +110,8 @@ class D3D12CommandProcessor : public CommandProcessor {
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
// Returns a single temporary GPU-side buffer within a frame for tasks like
// texture untiling and resolving.
// Returns a single temporary GPU-side buffer within a submission for tasks
// like texture untiling and resolving.
ID3D12Resource* RequestScratchGPUBuffer(uint32_t size,
D3D12_RESOURCE_STATES state);
// This must be called when done with the scratch buffer, to notify the
@ -127,22 +131,23 @@ class D3D12CommandProcessor : public CommandProcessor {
}
// Sets the current pipeline state to a compute pipeline. This is for cache
// invalidation primarily. A frame must be open.
// invalidation primarily. A submission must be open.
void SetComputePipeline(ID3D12PipelineState* pipeline);
// Stores and unbinds render targets before binding changing render targets
// externally. This is separate from SetExternalGraphicsPipeline because it
// causes computations to be dispatched, and the scratch buffer may also be
// used.
void UnbindRenderTargets();
void FlushAndUnbindRenderTargets();
// Sets the current pipeline state to a special drawing pipeline, invalidating
// various cached state variables. UnbindRenderTargets may be needed before
// calling this. A frame must be open.
void SetExternalGraphicsPipeline(ID3D12PipelineState* pipeline,
bool reset_viewport = true,
bool reset_blend_factor = false,
bool reset_stencil_ref = false);
// various cached state variables. FlushAndUnbindRenderTargets may be needed
// before calling this. A submission must be open.
void SetExternalGraphicsPipeline(
ID3D12PipelineState* pipeline,
bool changing_rts_and_sample_positions = true,
bool changing_viewport = true, bool changing_blend_factor = false,
bool changing_stencil_ref = false);
// Returns the text to display in the GPU backend name in the window title.
std::wstring GetWindowTitleText() const;
@ -170,7 +175,7 @@ class D3D12CommandProcessor : public CommandProcessor {
void FinalizeTrace() override;
private:
static constexpr uint32_t kQueuedFrames = 3;
static constexpr uint32_t kQueueFrames = 3;
enum RootParameter : UINT {
// These are always present.
@ -226,9 +231,12 @@ class D3D12CommandProcessor : public CommandProcessor {
// opposed to simply resuming after mid-frame synchronization).
void BeginSubmission(bool is_guest_command);
// If is_swap is true, a full frame is closed - with, if needed, cache
// clearing and stopping capturing.
void EndSubmission(bool is_swap);
// clearing and stopping capturing. Returns whether the submission was done
// successfully, if it has failed, leaves it open.
bool EndSubmission(bool is_swap);
void AwaitAllSubmissionsCompletion();
// Need to await submission completion before calling.
void ClearCommandListCache();
void UpdateFixedFunctionState(bool primitive_two_faced);
void UpdateSystemConstantValues(
@ -253,12 +261,30 @@ class D3D12CommandProcessor : public CommandProcessor {
bool cache_clear_requested_ = false;
uint64_t fence_current_value_ = 1;
uint64_t fence_completed_value_ = 0;
HANDLE fence_completion_event_ = nullptr;
ID3D12Fence* fence_ = nullptr;
bool submission_open_ = false;
// Values of submission_fence_.
uint64_t submission_current_ = 1;
uint64_t submission_completed_ = 0;
HANDLE submission_fence_completion_event_ = nullptr;
ID3D12Fence* submission_fence_ = nullptr;
std::unique_ptr<ui::d3d12::CommandList> command_lists_[kQueuedFrames] = {};
bool frame_open_ = false;
// Guest frame index, since some transient resources can be reused across
// submissions. Values updated in the beginning of a frame.
uint64_t frame_current_ = 1;
uint64_t frame_completed_ = 0;
// Submission indices of frames that have already been submitted.
uint64_t closed_frame_submissions_[kQueueFrames] = {};
struct CommandList {
std::unique_ptr<ui::d3d12::CommandList> command_list;
uint64_t last_usage_submission;
CommandList* next;
};
CommandList* command_list_writable_first_ = nullptr;
CommandList* command_list_writable_last_ = nullptr;
CommandList* command_list_submitted_first_ = nullptr;
CommandList* command_list_submitted_last_ = nullptr;
std::unique_ptr<DeferredCommandList> deferred_command_list_ = nullptr;
std::unique_ptr<SharedMemory> shared_memory_ = nullptr;
@ -283,10 +309,10 @@ class D3D12CommandProcessor : public CommandProcessor {
ID3D12Resource* gamma_ramp_texture_ = nullptr;
D3D12_RESOURCE_STATES gamma_ramp_texture_state_;
// Upload buffer for an image that is the same as gamma_ramp_, but with
// kQueuedFrames array layers.
// kQueueFrames array layers.
ID3D12Resource* gamma_ramp_upload_ = nullptr;
uint8_t* gamma_ramp_upload_mapping_ = nullptr;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT gamma_ramp_footprints_[kQueuedFrames * 2];
D3D12_PLACED_SUBRESOURCE_FOOTPRINT gamma_ramp_footprints_[kQueueFrames * 2];
static constexpr uint32_t kSwapTextureWidth = 1280;
static constexpr uint32_t kSwapTextureHeight = 720;
@ -308,7 +334,7 @@ class D3D12CommandProcessor : public CommandProcessor {
struct BufferForDeletion {
ID3D12Resource* buffer;
uint64_t last_usage_fence_value;
uint64_t last_usage_submission;
};
std::deque<BufferForDeletion> buffers_for_deletion_;
@ -322,9 +348,6 @@ class D3D12CommandProcessor : public CommandProcessor {
ID3D12Resource* readback_buffer_ = nullptr;
uint32_t readback_buffer_size_ = 0;
bool submission_open_ = false;
bool submission_frame_open_ = false;
std::atomic<bool> pix_capture_requested_ = false;
bool pix_capturing_;

View File

@ -112,7 +112,7 @@ bool PrimitiveConverter::Initialize() {
}
static_ib_upload_->Unmap(0, nullptr);
// Not uploaded yet.
static_ib_upload_fence_value_ = UINT64_MAX;
static_ib_upload_submission_ = UINT64_MAX;
if (FAILED(device->CreateCommittedResource(
&ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE,
&static_ib_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
@ -145,29 +145,26 @@ void PrimitiveConverter::ClearCache() { buffer_pool_->ClearCache(); }
void PrimitiveConverter::BeginSubmission() {
// Got a command list now - upload and transition the static index buffer if
// needed.
if (static_ib_upload_ && static_ib_upload_fence_value_ == UINT64_MAX) {
if (static_ib_upload_) {
if (static_ib_upload_submission_ == UINT64_MAX) {
// Not uploaded yet - upload.
command_processor_->GetDeferredCommandList()->D3DCopyResource(
static_ib_, static_ib_upload_);
command_processor_->PushTransitionBarrier(
static_ib_, D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_INDEX_BUFFER);
static_ib_upload_fence_value_ = command_processor_->GetCurrentFenceValue();
}
}
void PrimitiveConverter::BeginFrame() {
uint64_t completed_fence_value = command_processor_->GetCompletedFenceValue();
if (static_ib_upload_ && static_ib_upload_fence_value_ != UINT64_MAX &&
completed_fence_value >= static_ib_upload_fence_value_) {
static_ib_upload_submission_ = command_processor_->GetCurrentSubmission();
} else if (command_processor_->GetCompletedSubmission() >=
static_ib_upload_submission_) {
// Completely uploaded - release the upload buffer.
static_ib_upload_->Release();
static_ib_upload_ = nullptr;
}
}
}
buffer_pool_->Reclaim(command_processor_->GetCompletedFenceValue());
void PrimitiveConverter::BeginFrame() {
buffer_pool_->Reclaim(command_processor_->GetCompletedFrame());
converted_indices_cache_.clear();
memory_regions_used_ = 0;
}
@ -698,7 +695,7 @@ void* PrimitiveConverter::AllocateIndices(
}
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
uint8_t* mapping =
buffer_pool_->Request(command_processor_->GetCurrentFenceValue(), size,
buffer_pool_->Request(command_processor_->GetCurrentFrame(), size,
nullptr, nullptr, &gpu_address);
if (mapping == nullptr) {
XELOGE("Failed to allocate space for %u converted %u-bit vertex indices",

View File

@ -112,7 +112,7 @@ class PrimitiveConverter {
// CPU-side, used only for uploading - destroyed once the copy commands have
// been completed.
ID3D12Resource* static_ib_upload_ = nullptr;
uint64_t static_ib_upload_fence_value_;
uint64_t static_ib_upload_submission_;
// GPU-side - used for drawing.
ID3D12Resource* static_ib_ = nullptr;
D3D12_GPU_VIRTUAL_ADDRESS static_ib_gpu_address_;

View File

@ -452,15 +452,22 @@ void RenderTargetCache::ClearCache() {
}
void RenderTargetCache::BeginSubmission() {
// A submission does not always end in a resolve (for example, when memexport
// readback happens) or something else that would surely submit the UAV
// barrier, so we need to preserve the `current_` variables.
if (!command_processor_->IsROVUsedForEDRAM()) {
ClearBindings();
}
// With the ROV, a submission does not always end in a resolve (for example,
// when memexport readback happens) or something else that would surely submit
// the UAV barrier, so we need to preserve the `current_` variables.
//
// With RTVs, simply going to a different command list doesn't have to cause
// storing the render targets to the EDRAM buffer, however, the new command
// list doesn't have the needed RTVs/DSV bound yet.
//
// Just make sure they are bound to the new command list.
ForceApplyOnNextUpdate();
}
void RenderTargetCache::EndSubmission() { UnbindRenderTargets(); }
void RenderTargetCache::EndFrame() {
// May be clearing the cache after this.
FlushAndUnbindRenderTargets();
}
bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// There are two kinds of render target binding updates in this implementation
@ -754,6 +761,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
}
}
bool sample_positions_set = false;
// Need to change the bindings.
if (full_update || render_targets_to_attach) {
#if 0
@ -895,7 +904,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
if (!rov_used) {
// Sample positions when loading depth must match sample positions when
// drawing.
command_processor_->SetSamplePositions(rb_surface_info.msaa_samples);
command_processor_->SetSamplePositions(current_msaa_samples_);
sample_positions_set = true;
// Load the contents of the new render targets from the EDRAM buffer (will
// change the state of the render targets to copy destination).
@ -921,13 +931,16 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// Transition the render targets to the appropriate state if needed,
// compress the list of the render target because null RTV descriptors are
// broken in Direct3D 12 and bind the render targets to the command list.
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[4];
// broken in Direct3D 12 and update the list of the render targets to bind
// to the command list.
uint32_t rtv_count = 0;
for (uint32_t i = 0; i < 4; ++i) {
const RenderTargetBinding& binding = current_bindings_[i];
if (!binding.is_bound) {
continue;
}
RenderTarget* render_target = binding.render_target;
if (!binding.is_bound || render_target == nullptr) {
if (render_target == nullptr) {
continue;
}
XELOGGPU("RT Color %u: base %u, format %u", i, edram_bases[i],
@ -936,7 +949,6 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
render_target->resource, render_target->state,
D3D12_RESOURCE_STATE_RENDER_TARGET);
render_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET;
rtv_handles[rtv_count] = render_target->handle;
current_pipeline_render_targets_[rtv_count].guest_render_target = i;
current_pipeline_render_targets_[rtv_count].format =
GetColorDXGIFormat(ColorRenderTargetFormat(formats[i]));
@ -946,7 +958,6 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
current_pipeline_render_targets_[i].guest_render_target = i;
current_pipeline_render_targets_[i].format = DXGI_FORMAT_UNKNOWN;
}
const D3D12_CPU_DESCRIPTOR_HANDLE* dsv_handle;
const RenderTargetBinding& depth_binding = current_bindings_[4];
RenderTarget* depth_render_target = depth_binding.render_target;
current_pipeline_render_targets_[4].guest_render_target = 4;
@ -956,18 +967,43 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
depth_render_target->resource, depth_render_target->state,
D3D12_RESOURCE_STATE_DEPTH_WRITE);
depth_render_target->state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
dsv_handle = &depth_binding.render_target->handle;
current_pipeline_render_targets_[4].format =
GetDepthDXGIFormat(DepthRenderTargetFormat(formats[4]));
} else {
dsv_handle = nullptr;
current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN;
}
command_processor_->SubmitBarriers();
apply_to_command_list_ = true;
}
}
// Bind the render targets to the command list, either in case of an update or
// if asked to externally.
if (!rov_used && apply_to_command_list_) {
apply_to_command_list_ = false;
if (!sample_positions_set) {
command_processor_->SetSamplePositions(current_msaa_samples_);
}
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[4];
uint32_t rtv_count;
for (rtv_count = 0; rtv_count < 4; ++rtv_count) {
const PipelineRenderTarget& pipeline_render_target =
current_pipeline_render_targets_[rtv_count];
if (pipeline_render_target.format == DXGI_FORMAT_UNKNOWN) {
break;
}
const RenderTargetBinding& binding =
current_bindings_[pipeline_render_target.guest_render_target];
rtv_handles[rtv_count] = binding.render_target->handle;
}
const RenderTargetBinding& depth_binding = current_bindings_[4];
const D3D12_CPU_DESCRIPTOR_HANDLE* dsv_handle =
current_pipeline_render_targets_[4].format != DXGI_FORMAT_UNKNOWN
? &depth_binding.render_target->handle
: nullptr;
command_processor_->GetDeferredCommandList()->D3DOMSetRenderTargets(
rtv_count, rtv_handles, FALSE, dsv_handle);
}
}
// Update the dirty regions.
for (uint32_t i = 0; i < 5; ++i) {
@ -2098,7 +2134,7 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
return resolve_target;
}
void RenderTargetCache::UnbindRenderTargets() {
void RenderTargetCache::FlushAndUnbindRenderTargets() {
if (command_processor_->IsROVUsedForEDRAM()) {
return;
}
@ -2217,6 +2253,7 @@ void RenderTargetCache::ClearBindings() {
current_msaa_samples_ = MsaaSamples::k1X;
current_edram_max_rows_ = 0;
std::memset(current_bindings_, 0, sizeof(current_bindings_));
apply_to_command_list_ = true;
}
#if 0

View File

@ -256,7 +256,7 @@ class RenderTargetCache {
void ClearCache();
void BeginSubmission();
void EndSubmission();
void EndFrame();
// Called in the beginning of a draw call - may bind pipelines.
bool UpdateRenderTargets(const D3D12Shader* pixel_shader);
// Returns the host-to-guest mappings and host formats of currently bound
@ -272,10 +272,13 @@ class RenderTargetCache {
bool Resolve(SharedMemory* shared_memory, TextureCache* texture_cache,
Memory* memory, uint32_t& written_address_out,
uint32_t& written_length_out);
// Makes sure the render targets are re-attached to the command list for which
// the next update will take place.
void ForceApplyOnNextUpdate() { apply_to_command_list_ = true; }
// Flushes the render targets to EDRAM and unbinds them, for instance, when
// the command processor takes over framebuffer bindings to draw something
// special.
void UnbindRenderTargets();
void FlushAndUnbindRenderTargets();
void WriteEDRAMUint32UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
// Totally necessary to rely on the base format - Too Human switches between
@ -636,6 +639,7 @@ class RenderTargetCache {
// current_edram_max_rows_ is for RTV/DSV only (render target texture size).
uint32_t current_edram_max_rows_ = 0;
RenderTargetBinding current_bindings_[5] = {};
bool apply_to_command_list_ = true;
PipelineRenderTarget current_pipeline_render_targets_[5];

View File

@ -165,8 +165,8 @@ void SharedMemory::Shutdown() {
}
}
void SharedMemory::BeginFrame() {
upload_buffer_pool_->Reclaim(command_processor_->GetCompletedFenceValue());
void SharedMemory::BeginSubmission() {
upload_buffer_pool_->Reclaim(command_processor_->GetCompletedSubmission());
}
SharedMemory::GlobalWatchHandle SharedMemory::RegisterGlobalWatch(
@ -367,7 +367,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
ID3D12Resource* upload_buffer;
uint32_t upload_buffer_offset, upload_buffer_size;
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
command_processor_->GetCurrentFenceValue(),
command_processor_->GetCurrentSubmission(),
upload_range_length << page_size_log2_, &upload_buffer,
&upload_buffer_offset, &upload_buffer_size, nullptr);
if (upload_buffer_mapping == nullptr) {

View File

@ -44,7 +44,7 @@ class SharedMemory {
return buffer_gpu_address_;
}
void BeginFrame();
void BeginSubmission();
typedef void (*GlobalWatchCallback)(void* context, uint32_t address_first,
uint32_t address_last,

View File

@ -1169,7 +1169,7 @@ void TextureCache::BeginFrame() {
texture_current_usage_time_ = xe::Clock::QueryHostUptimeMillis();
// If memory usage is too high, destroy unused textures.
uint64_t completed_fence_value = command_processor_->GetCompletedFenceValue();
uint64_t completed_frame = command_processor_->GetCompletedFrame();
uint32_t limit_soft_mb = cvars::d3d12_texture_cache_limit_soft;
uint32_t limit_hard_mb = cvars::d3d12_texture_cache_limit_hard;
if (IsResolutionScale2X()) {
@ -1186,7 +1186,7 @@ void TextureCache::BeginFrame() {
break;
}
Texture* texture = texture_used_first_;
if (texture->last_usage_fence_value > completed_fence_value) {
if (texture->last_usage_frame > completed_frame) {
break;
}
if (!limit_hard_exceeded &&
@ -2311,7 +2311,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
texture->resource_size =
device->GetResourceAllocationInfo(0, 1, &desc).SizeInBytes;
texture->state = state;
texture->last_usage_fence_value = command_processor_->GetCurrentFenceValue();
texture->last_usage_frame = command_processor_->GetCurrentFrame();
texture->last_usage_time = texture_current_usage_time_;
texture->used_previous = texture_used_last_;
texture->used_next = nullptr;
@ -2606,7 +2606,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
}
D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address;
uint8_t* cbuffer_mapping = cbuffer_pool->Request(
command_processor_->GetCurrentFenceValue(),
command_processor_->GetCurrentFrame(),
xe::align(uint32_t(sizeof(load_constants)), 256u), nullptr, nullptr,
&cbuffer_gpu_address);
if (cbuffer_mapping == nullptr) {
@ -2684,10 +2684,10 @@ bool TextureCache::LoadTextureData(Texture* texture) {
}
void TextureCache::MarkTextureUsed(Texture* texture) {
uint64_t current_fence_value = command_processor_->GetCurrentFenceValue();
uint64_t current_frame = command_processor_->GetCurrentFrame();
// This is called very frequently, don't relink unless needed for caching.
if (texture->last_usage_fence_value != current_fence_value) {
texture->last_usage_fence_value = current_fence_value;
if (texture->last_usage_frame != current_frame) {
texture->last_usage_frame = current_frame;
texture->last_usage_time = texture_current_usage_time_;
if (texture->used_next == nullptr) {
// Simplify the code a bit - already in the end of the list.

View File

@ -343,7 +343,7 @@ class TextureCache {
uint64_t resource_size;
D3D12_RESOURCE_STATES state;
uint64_t last_usage_fence_value;
uint64_t last_usage_frame;
uint64_t last_usage_time;
Texture* used_previous;
Texture* used_next;

View File

@ -24,9 +24,9 @@ UploadBufferPool::UploadBufferPool(ID3D12Device* device, uint32_t page_size)
UploadBufferPool::~UploadBufferPool() { ClearCache(); }
void UploadBufferPool::Reclaim(uint64_t completed_fence_value) {
void UploadBufferPool::Reclaim(uint64_t completed_submission_index) {
while (submitted_first_) {
if (submitted_first_->last_usage_fence_value > completed_fence_value) {
if (submitted_first_->last_submission_index > completed_submission_index) {
break;
}
if (writable_last_) {
@ -67,7 +67,7 @@ void UploadBufferPool::ClearCache() {
writable_last_ = nullptr;
}
uint8_t* UploadBufferPool::Request(uint64_t usage_fence_value, uint32_t size,
uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size,
ID3D12Resource** buffer_out,
uint32_t* offset_out,
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) {
@ -76,9 +76,9 @@ uint8_t* UploadBufferPool::Request(uint64_t usage_fence_value, uint32_t size,
return nullptr;
}
assert_true(!current_page_used_ ||
usage_fence_value >= writable_first_->last_usage_fence_value);
submission_index >= writable_first_->last_submission_index);
assert_true(!submitted_last_ ||
usage_fence_value >= submitted_last_->last_usage_fence_value);
submission_index >= submitted_last_->last_submission_index);
if (page_size_ - current_page_used_ < size || !writable_first_) {
// Start a new page if can't fit all the bytes or don't have an open page.
if (writable_first_) {
@ -122,13 +122,13 @@ uint8_t* UploadBufferPool::Request(uint64_t usage_fence_value, uint32_t size,
writable_first_->buffer = new_buffer;
writable_first_->gpu_address = new_buffer->GetGPUVirtualAddress();
writable_first_->mapping = new_buffer_mapping;
writable_first_->last_usage_fence_value = usage_fence_value;
writable_first_->last_submission_index = submission_index;
writable_first_->next = nullptr;
writable_last_ = writable_first_;
}
current_page_used_ = 0;
}
writable_first_->last_usage_fence_value = usage_fence_value;
writable_first_->last_submission_index = submission_index;
if (buffer_out) {
*buffer_out = writable_first_->buffer;
}
@ -145,7 +145,7 @@ uint8_t* UploadBufferPool::Request(uint64_t usage_fence_value, uint32_t size,
}
uint8_t* UploadBufferPool::RequestPartial(
uint64_t usage_fence_value, uint32_t size, ID3D12Resource** buffer_out,
uint64_t submission_index, uint32_t size, ID3D12Resource** buffer_out,
uint32_t* offset_out, uint32_t* size_out,
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) {
size = std::min(size, page_size_);
@ -153,7 +153,7 @@ uint8_t* UploadBufferPool::RequestPartial(
size = std::min(size, page_size_ - current_page_used_);
}
uint8_t* mapping =
Request(usage_fence_value, size, buffer_out, offset_out, gpu_address_out);
Request(submission_index, size, buffer_out, offset_out, gpu_address_out);
if (!mapping) {
return nullptr;
}
@ -172,9 +172,9 @@ DescriptorHeapPool::DescriptorHeapPool(ID3D12Device* device,
DescriptorHeapPool::~DescriptorHeapPool() { ClearCache(); }
void DescriptorHeapPool::Reclaim(uint64_t completed_fence_value) {
void DescriptorHeapPool::Reclaim(uint64_t completed_submission_index) {
while (submitted_first_) {
if (submitted_first_->last_usage_fence_value > completed_fence_value) {
if (submitted_first_->last_submission_index > completed_submission_index) {
break;
}
if (writable_last_) {
@ -213,7 +213,7 @@ void DescriptorHeapPool::ClearCache() {
writable_last_ = nullptr;
}
uint64_t DescriptorHeapPool::Request(uint64_t usage_fence_value,
uint64_t DescriptorHeapPool::Request(uint64_t submission_index,
uint64_t previous_heap_index,
uint32_t count_for_partial_update,
uint32_t count_for_full_update,
@ -225,9 +225,9 @@ uint64_t DescriptorHeapPool::Request(uint64_t usage_fence_value,
return kHeapIndexInvalid;
}
assert_true(!current_page_used_ ||
usage_fence_value >= writable_first_->last_usage_fence_value);
submission_index >= writable_first_->last_submission_index);
assert_true(!submitted_last_ ||
usage_fence_value >= submitted_last_->last_usage_fence_value);
submission_index >= submitted_last_->last_submission_index);
// If the last full update happened on the current page, a partial update is
// possible.
uint32_t count = previous_heap_index == current_heap_index_
@ -271,11 +271,11 @@ uint64_t DescriptorHeapPool::Request(uint64_t usage_fence_value,
writable_first_->heap = new_heap;
writable_first_->cpu_start = new_heap->GetCPUDescriptorHandleForHeapStart();
writable_first_->gpu_start = new_heap->GetGPUDescriptorHandleForHeapStart();
writable_first_->last_usage_fence_value = usage_fence_value;
writable_first_->last_submission_index = submission_index;
writable_first_->next = nullptr;
writable_last_ = writable_first_;
}
writable_first_->last_usage_fence_value = usage_fence_value;
writable_first_->last_submission_index = submission_index;
index_out = current_page_used_;
current_page_used_ += count;
return current_heap_index_;

View File

@ -18,21 +18,24 @@ namespace xe {
namespace ui {
namespace d3d12 {
// Submission index is the fence value or a value derived from it (if reclaiming
// less often than once per fence value, for instance).
class UploadBufferPool {
public:
UploadBufferPool(ID3D12Device* device, uint32_t page_size);
~UploadBufferPool();
void Reclaim(uint64_t completed_fence_value);
void Reclaim(uint64_t completed_submission_index);
void ClearCache();
// Request to write data in a single piece, creating a new page if the current
// one doesn't have enough free space.
uint8_t* Request(uint64_t usage_fence_value, uint32_t size,
uint8_t* Request(uint64_t submission_index, uint32_t size,
ID3D12Resource** buffer_out, uint32_t* offset_out,
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out);
// Request to write data in multiple parts, filling the buffer entirely.
uint8_t* RequestPartial(uint64_t usage_fence_value, uint32_t size,
uint8_t* RequestPartial(uint64_t submission_index, uint32_t size,
ID3D12Resource** buffer_out, uint32_t* offset_out,
uint32_t* size_out,
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out);
@ -45,7 +48,7 @@ class UploadBufferPool {
ID3D12Resource* buffer;
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
void* mapping;
uint64_t last_usage_fence_value;
uint64_t last_submission_index;
Page* next;
};
@ -68,7 +71,7 @@ class DescriptorHeapPool {
uint32_t page_size);
~DescriptorHeapPool();
void Reclaim(uint64_t completed_fence_value);
void Reclaim(uint64_t completed_submission_index);
void ClearCache();
// Because all descriptors for a single draw call must be in the same heap,
@ -96,7 +99,7 @@ class DescriptorHeapPool {
// This MUST be called even if there's nothing to write in a partial update
// (with count_for_partial_update being 0), because a full update may still be
// required.
uint64_t Request(uint64_t usage_fence_value, uint64_t previous_heap_index,
uint64_t Request(uint64_t submission_index, uint64_t previous_heap_index,
uint32_t count_for_partial_update,
uint32_t count_for_full_update, uint32_t& index_out);
@ -122,7 +125,7 @@ class DescriptorHeapPool {
ID3D12DescriptorHeap* heap;
D3D12_CPU_DESCRIPTOR_HANDLE cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_start;
uint64_t last_usage_fence_value;
uint64_t last_submission_index;
Page* next;
};