[D3D12] Don't use D3D12Context for command processor fence

This commit is contained in:
Triang3l 2019-10-28 10:49:32 +03:00
parent b4af63fe31
commit d3b6f71ae1
16 changed files with 337 additions and 424 deletions

View File

@ -55,6 +55,7 @@ namespace xe {
namespace gpu {
namespace d3d12 {
constexpr uint32_t D3D12CommandProcessor::kQueuedFrames;
constexpr uint32_t
D3D12CommandProcessor::RootExtraParameterIndices::kUnavailable;
constexpr uint32_t D3D12CommandProcessor::kSwapTextureWidth;
@ -440,8 +441,8 @@ uint64_t D3D12CommandProcessor::RequestViewDescriptors(
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) {
uint32_t descriptor_index;
uint64_t current_heap_index = view_heap_pool_->Request(
GetD3D12Context()->GetCurrentFrame(), previous_heap_index,
count_for_partial_update, count_for_full_update, descriptor_index);
fence_current_value_, previous_heap_index, count_for_partial_update,
count_for_full_update, descriptor_index);
if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
// There was an error.
return ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid;
@ -466,8 +467,8 @@ uint64_t D3D12CommandProcessor::RequestSamplerDescriptors(
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) {
uint32_t descriptor_index;
uint64_t current_heap_index = sampler_heap_pool_->Request(
GetD3D12Context()->GetCurrentFrame(), previous_heap_index,
count_for_partial_update, count_for_full_update, descriptor_index);
fence_current_value_, previous_heap_index, count_for_partial_update,
count_for_full_update, descriptor_index);
if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
// There was an error.
return ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid;
@ -490,9 +491,9 @@ uint64_t D3D12CommandProcessor::RequestSamplerDescriptors(
ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer(
uint32_t size, D3D12_RESOURCE_STATES state) {
assert_true(current_queue_frame_ != UINT_MAX);
assert_true(submission_open_);
assert_false(scratch_buffer_used_);
if (current_queue_frame_ == UINT_MAX || scratch_buffer_used_ || size == 0) {
if (!submission_open_ || scratch_buffer_used_ || size == 0) {
return nullptr;
}
@ -505,8 +506,7 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer(
size = xe::align(size, kScratchBufferSizeIncrement);
auto context = GetD3D12Context();
auto device = context->GetD3D12Provider()->GetDevice();
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_RESOURCE_DESC buffer_desc;
ui::d3d12::util::FillBufferResourceDesc(
buffer_desc, size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
@ -520,7 +520,7 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer(
if (scratch_buffer_ != nullptr) {
BufferForDeletion buffer_for_deletion;
buffer_for_deletion.buffer = scratch_buffer_;
buffer_for_deletion.last_usage_frame = GetD3D12Context()->GetCurrentFrame();
buffer_for_deletion.last_usage_fence_value = fence_current_value_;
buffers_for_deletion_.push_back(buffer_for_deletion);
}
scratch_buffer_ = buffer;
@ -532,7 +532,7 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer(
void D3D12CommandProcessor::ReleaseScratchGPUBuffer(
ID3D12Resource* buffer, D3D12_RESOURCE_STATES new_state) {
assert_true(current_queue_frame_ != UINT_MAX);
assert_true(submission_open_);
assert_true(scratch_buffer_used_);
scratch_buffer_used_ = false;
if (buffer == scratch_buffer_) {
@ -552,8 +552,10 @@ void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) {
if (cvars::d3d12_ssaa_custom_sample_positions && !IsROVUsedForEDRAM()) {
auto provider = GetD3D12Context()->GetD3D12Provider();
auto tier = provider->GetProgrammableSamplePositionsTier();
uint32_t command_list_index =
uint32_t((fence_current_value_ + (kQueuedFrames - 1)) % kQueuedFrames);
if (tier >= 2 &&
command_lists_[current_queue_frame_]->GetCommandList1() != nullptr) {
command_lists_[command_list_index]->GetCommandList1() != nullptr) {
// Depth buffer transitions are affected by sample positions.
SubmitBarriers();
// Standard sample positions in Direct3D 10.1, but adjusted to take the
@ -671,7 +673,7 @@ std::unique_ptr<xe::ui::RawImage> D3D12CommandProcessor::Capture() {
PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_COPY_SOURCE,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
EndFrame();
GetD3D12Context()->AwaitAllFramesCompletion();
AwaitAllSubmissionsCompletion();
D3D12_RANGE readback_range;
readback_range.Begin = swap_texture_copy_footprint_.Offset;
readback_range.End = swap_texture_copy_size_;
@ -709,7 +711,20 @@ bool D3D12CommandProcessor::SetupContext() {
auto device = provider->GetDevice();
auto direct_queue = provider->GetDirectQueue();
for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) {
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
IID_PPV_ARGS(&fence_)))) {
XELOGE("Failed to create the fence");
return false;
}
fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr);
if (fence_completion_event_ == nullptr) {
XELOGE("Failed to create the fence completion event");
return false;
}
fence_current_value_ = 1;
fence_completed_value_ = 0;
for (uint32_t i = 0; i < kQueuedFrames; ++i) {
command_lists_[i] = ui::d3d12::CommandList::Create(
device, direct_queue, D3D12_COMMAND_LIST_TYPE_DIRECT);
if (command_lists_[i] == nullptr) {
@ -791,11 +806,11 @@ bool D3D12CommandProcessor::SetupContext() {
return false;
}
// Get the layout for the upload buffer.
gamma_ramp_desc.DepthOrArraySize = ui::d3d12::D3D12Context::kQueuedFrames;
gamma_ramp_desc.DepthOrArraySize = kQueuedFrames;
UINT64 gamma_ramp_upload_size;
device->GetCopyableFootprints(
&gamma_ramp_desc, 0, ui::d3d12::D3D12Context::kQueuedFrames * 2, 0,
gamma_ramp_footprints_, nullptr, nullptr, &gamma_ramp_upload_size);
device->GetCopyableFootprints(&gamma_ramp_desc, 0, kQueuedFrames * 2, 0,
gamma_ramp_footprints_, nullptr, nullptr,
&gamma_ramp_upload_size);
// Create the upload buffer for the gamma ramp.
ui::d3d12::util::FillBufferResourceDesc(
gamma_ramp_desc, gamma_ramp_upload_size, D3D12_RESOURCE_FLAG_NONE);
@ -890,8 +905,7 @@ bool D3D12CommandProcessor::SetupContext() {
}
void D3D12CommandProcessor::ShutdownContext() {
auto context = GetD3D12Context();
context->AwaitAllFramesCompletion();
AwaitAllSubmissionsCompletion();
ui::d3d12::util::ReleaseAndNull(readback_buffer_);
readback_buffer_size_ = 0;
@ -910,8 +924,8 @@ void D3D12CommandProcessor::ShutdownContext() {
swap_state_.pending = false;
swap_state_.front_buffer_texture = 0;
}
auto graphics_system = static_cast<D3D12GraphicsSystem*>(graphics_system_);
graphics_system->AwaitFrontBufferUnused();
// TODO(Triang3l): Ensure this is synchronized. The display context may not
// exist at this point, so awaiting its fence doesn't always work.
swap_texture_srv_descriptor_heap_->Release();
swap_texture_srv_descriptor_heap_ = nullptr;
}
@ -950,10 +964,19 @@ void D3D12CommandProcessor::ShutdownContext() {
shared_memory_.reset();
deferred_command_list_.reset();
for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) {
for (uint32_t i = 0; i < kQueuedFrames; ++i) {
command_lists_[i].reset();
}
// First release the fence since it may reference the event.
ui::d3d12::util::ReleaseAndNull(fence_);
if (fence_completion_event_) {
CloseHandle(fence_completion_event_);
fence_completion_event_ = nullptr;
}
fence_current_value_ = 1;
fence_completed_value_ = 0;
CommandProcessor::ShutdownContext();
}
@ -962,7 +985,7 @@ void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X &&
index <= XE_GPU_REG_SHADER_CONSTANT_511_W) {
if (current_queue_frame_ != UINT32_MAX) {
if (submission_open_) {
uint32_t float_constant_index =
(index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2;
if (float_constant_index >= 256) {
@ -1013,9 +1036,11 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
auto device = provider->GetDevice();
// Upload the new gamma ramps.
uint32_t command_list_index =
uint32_t((fence_current_value_ + (kQueuedFrames - 1)) % kQueuedFrames);
if (dirty_gamma_ramp_normal_) {
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint =
gamma_ramp_footprints_[current_queue_frame_ * 2];
gamma_ramp_footprints_[command_list_index * 2];
volatile uint32_t* mapping = reinterpret_cast<uint32_t*>(
gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset);
for (uint32_t i = 0; i < 256; ++i) {
@ -1041,7 +1066,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
}
if (dirty_gamma_ramp_pwl_) {
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint =
gamma_ramp_footprints_[current_queue_frame_ * 2 + 1];
gamma_ramp_footprints_[command_list_index * 2 + 1];
volatile uint32_t* mapping = reinterpret_cast<uint32_t*>(
gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset);
for (uint32_t i = 0; i < 128; ++i) {
@ -1153,7 +1178,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
if (cache_clear_requested_) {
cache_clear_requested_ = false;
GetD3D12Context()->AwaitAllFramesCompletion();
AwaitAllSubmissionsCompletion();
ui::d3d12::util::ReleaseAndNull(scratch_buffer_);
scratch_buffer_size_ = 0;
@ -1191,8 +1216,7 @@ Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type,
bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
uint32_t index_count,
IndexBufferInfo* index_buffer_info) {
auto context = GetD3D12Context();
auto device = context->GetD3D12Provider()->GetDevice();
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES
@ -1685,7 +1709,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
readback_buffer_offset += memexport_range_size;
}
EndFrame();
context->AwaitAllFramesCompletion();
AwaitAllSubmissionsCompletion();
D3D12_RANGE readback_range;
readback_range.Begin = 0;
readback_range.End = memexport_total_size;
@ -1718,7 +1742,7 @@ void D3D12CommandProcessor::InitializeTrace() {
anySubmitted |= shared_memory_->InitializeTraceSubmitDownloads();
if (anySubmitted) {
EndFrame();
GetD3D12Context()->AwaitAllFramesCompletion();
AwaitAllSubmissionsCompletion();
shared_memory_->InitializeTraceCompleteDownloads();
}
}
@ -1748,7 +1772,7 @@ bool D3D12CommandProcessor::IssueCopy() {
readback_buffer, 0, shared_memory_buffer, written_address,
written_length);
EndFrame();
GetD3D12Context()->AwaitAllFramesCompletion();
AwaitAllSubmissionsCompletion();
D3D12_RANGE readback_range;
readback_range.Begin = 0;
readback_range.End = written_length;
@ -1766,7 +1790,7 @@ bool D3D12CommandProcessor::IssueCopy() {
}
bool D3D12CommandProcessor::BeginFrame() {
if (current_queue_frame_ != UINT32_MAX) {
if (submission_open_) {
return false;
}
@ -1774,17 +1798,23 @@ bool D3D12CommandProcessor::BeginFrame() {
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
auto context = GetD3D12Context();
auto provider = context->GetD3D12Provider();
context->BeginSwap();
current_queue_frame_ = context->GetCurrentQueueFrame();
submission_open_ = true;
// Wait for a swap command list to become free.
// Command list 0 is used when fence_current_value_ is 1, 4, 7...
fence_completed_value_ = fence_->GetCompletedValue();
if (fence_completed_value_ + kQueuedFrames < fence_current_value_) {
fence_->SetEventOnCompletion(fence_current_value_ - kQueuedFrames,
fence_completion_event_);
WaitForSingleObject(fence_completion_event_, INFINITE);
fence_completed_value_ = fence_->GetCompletedValue();
}
// Remove outdated temporary buffers.
uint64_t last_completed_frame = context->GetLastCompletedFrame();
auto erase_buffers_end = buffers_for_deletion_.begin();
while (erase_buffers_end != buffers_for_deletion_.end()) {
uint64_t upload_frame = erase_buffers_end->last_usage_frame;
if (upload_frame > last_completed_frame) {
uint64_t upload_fence_value = erase_buffers_end->last_usage_fence_value;
if (upload_fence_value > fence_completed_value_) {
++erase_buffers_end;
break;
}
@ -1830,16 +1860,17 @@ bool D3D12CommandProcessor::BeginFrame() {
pix_capturing_ =
pix_capture_requested_.exchange(false, std::memory_order_relaxed);
if (pix_capturing_) {
IDXGraphicsAnalysis* graphics_analysis = provider->GetGraphicsAnalysis();
IDXGraphicsAnalysis* graphics_analysis =
GetD3D12Context()->GetD3D12Provider()->GetGraphicsAnalysis();
if (graphics_analysis != nullptr) {
graphics_analysis->BeginCapture();
}
}
deferred_command_list_->Reset();
constant_buffer_pool_->Reclaim(last_completed_frame);
view_heap_pool_->Reclaim(last_completed_frame);
sampler_heap_pool_->Reclaim(last_completed_frame);
constant_buffer_pool_->Reclaim(fence_completed_value_);
view_heap_pool_->Reclaim(fence_completed_value_);
sampler_heap_pool_->Reclaim(fence_completed_value_);
shared_memory_->BeginFrame();
@ -1853,10 +1884,12 @@ bool D3D12CommandProcessor::BeginFrame() {
}
bool D3D12CommandProcessor::EndFrame() {
if (current_queue_frame_ == UINT32_MAX) {
if (!submission_open_) {
return false;
}
auto provider = GetD3D12Context()->GetD3D12Provider();
assert_false(scratch_buffer_used_);
pipeline_cache_->EndFrame();
@ -1870,28 +1903,40 @@ bool D3D12CommandProcessor::EndFrame() {
SubmitBarriers();
// Submit the command list.
auto current_command_list = command_lists_[current_queue_frame_].get();
uint32_t command_list_index =
uint32_t((fence_current_value_ + (kQueuedFrames - 1)) % kQueuedFrames);
auto current_command_list = command_lists_[command_list_index].get();
current_command_list->BeginRecording();
deferred_command_list_->Execute(current_command_list->GetCommandList(),
current_command_list->GetCommandList1());
current_command_list->Execute();
if (pix_capturing_) {
IDXGraphicsAnalysis* graphics_analysis =
GetD3D12Context()->GetD3D12Provider()->GetGraphicsAnalysis();
IDXGraphicsAnalysis* graphics_analysis = provider->GetGraphicsAnalysis();
if (graphics_analysis != nullptr) {
graphics_analysis->EndCapture();
}
pix_capturing_ = false;
}
auto context = GetD3D12Context();
context->EndSwap();
current_queue_frame_ = UINT32_MAX;
provider->GetDirectQueue()->Signal(fence_, fence_current_value_++);
submission_open_ = false;
return true;
}
void D3D12CommandProcessor::AwaitAllSubmissionsCompletion() {
// May be called if shutting down without everything set up.
if ((fence_completed_value_ + 1) >= fence_current_value_ || !fence_ ||
GetD3D12Context()->WasLost()) {
return;
}
fence_->SetEventOnCompletion(fence_current_value_ - 1,
fence_completion_event_);
WaitForSingleObject(fence_completion_event_, INFINITE);
fence_completed_value_ = fence_current_value_ - 1;
}
void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
auto& regs = *register_file_;
@ -2626,10 +2671,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
bool D3D12CommandProcessor::UpdateBindings(
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
ID3D12RootSignature* root_signature) {
auto context = GetD3D12Context();
auto provider = context->GetD3D12Provider();
auto provider = GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
auto current_frame = context->GetCurrentFrame();
auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES
@ -2765,8 +2808,9 @@ bool D3D12CommandProcessor::UpdateBindings(
// Update constant buffers.
if (!cbuffer_bindings_system_.up_to_date) {
uint8_t* system_constants = constant_buffer_pool_->Request(
current_frame, xe::align(uint32_t(sizeof(system_constants_)), 256u),
nullptr, nullptr, &cbuffer_bindings_system_.buffer_address);
fence_current_value_,
xe::align(uint32_t(sizeof(system_constants_)), 256u), nullptr, nullptr,
&cbuffer_bindings_system_.buffer_address);
if (system_constants == nullptr) {
return false;
}
@ -2777,7 +2821,7 @@ bool D3D12CommandProcessor::UpdateBindings(
}
if (!cbuffer_bindings_float_vertex_.up_to_date) {
uint8_t* float_constants = constant_buffer_pool_->Request(
current_frame, float_constant_size_vertex, nullptr, nullptr,
fence_current_value_, float_constant_size_vertex, nullptr, nullptr,
&cbuffer_bindings_float_vertex_.buffer_address);
if (float_constants == nullptr) {
return false;
@ -2802,7 +2846,7 @@ bool D3D12CommandProcessor::UpdateBindings(
}
if (!cbuffer_bindings_float_pixel_.up_to_date) {
uint8_t* float_constants = constant_buffer_pool_->Request(
current_frame, float_constant_size_pixel, nullptr, nullptr,
fence_current_value_, float_constant_size_pixel, nullptr, nullptr,
&cbuffer_bindings_float_pixel_.buffer_address);
if (float_constants == nullptr) {
return false;
@ -2832,7 +2876,7 @@ bool D3D12CommandProcessor::UpdateBindings(
if (!cbuffer_bindings_bool_loop_.up_to_date) {
uint32_t* bool_loop_constants =
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
current_frame, 768, nullptr, nullptr,
fence_current_value_, 768, nullptr, nullptr,
&cbuffer_bindings_bool_loop_.buffer_address));
if (bool_loop_constants == nullptr) {
return false;
@ -2851,9 +2895,9 @@ bool D3D12CommandProcessor::UpdateBindings(
write_bool_loop_constant_view = true;
}
if (!cbuffer_bindings_fetch_.up_to_date) {
uint8_t* fetch_constants =
constant_buffer_pool_->Request(current_frame, 768, nullptr, nullptr,
&cbuffer_bindings_fetch_.buffer_address);
uint8_t* fetch_constants = constant_buffer_pool_->Request(
fence_current_value_, 768, nullptr, nullptr,
&cbuffer_bindings_fetch_.buffer_address);
if (fetch_constants == nullptr) {
return false;
}
@ -3200,8 +3244,7 @@ ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) {
}
size = xe::align(size, kReadbackBufferSizeIncrement);
if (size > readback_buffer_size_) {
auto context = GetD3D12Context();
auto device = context->GetD3D12Provider()->GetDevice();
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_RESOURCE_DESC buffer_desc;
ui::d3d12::util::FillBufferResourceDesc(buffer_desc, size,
D3D12_RESOURCE_FLAG_NONE);

View File

@ -63,6 +63,9 @@ class D3D12CommandProcessor : public CommandProcessor {
// targets.
bool IsROVUsedForEDRAM() const;
uint64_t GetCurrentFenceValue() const { return fence_current_value_; }
uint64_t GetCompletedFenceValue() const { return fence_completed_value_; }
// Gets the current color write mask, taking the pixel shader's write mask
// into account. If a shader doesn't write to a render target, it shouldn't be
// written to and it shouldn't be even bound - otherwise, in Halo 3, one
@ -167,6 +170,8 @@ class D3D12CommandProcessor : public CommandProcessor {
void FinalizeTrace() override;
private:
static constexpr uint32_t kQueuedFrames = 3;
enum RootParameter : UINT {
// These are always present.
@ -215,6 +220,7 @@ class D3D12CommandProcessor : public CommandProcessor {
bool BeginFrame();
// Returns true if an open frame was ended.
bool EndFrame();
void AwaitAllSubmissionsCompletion();
void UpdateFixedFunctionState(bool primitive_two_faced);
void UpdateSystemConstantValues(
@ -239,8 +245,12 @@ class D3D12CommandProcessor : public CommandProcessor {
bool cache_clear_requested_ = false;
std::unique_ptr<ui::d3d12::CommandList>
command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {};
uint64_t fence_current_value_ = 1;
uint64_t fence_completed_value_ = 0;
HANDLE fence_completion_event_ = nullptr;
ID3D12Fence* fence_ = nullptr;
std::unique_ptr<ui::d3d12::CommandList> command_lists_[kQueuedFrames] = {};
std::unique_ptr<DeferredCommandList> deferred_command_list_ = nullptr;
std::unique_ptr<SharedMemory> shared_memory_ = nullptr;
@ -265,11 +275,10 @@ class D3D12CommandProcessor : public CommandProcessor {
ID3D12Resource* gamma_ramp_texture_ = nullptr;
D3D12_RESOURCE_STATES gamma_ramp_texture_state_;
// Upload buffer for an image that is the same as gamma_ramp_, but with
// ui::d3d12::D3D12Context::kQueuedFrames array layers.
// kQueuedFrames array layers.
ID3D12Resource* gamma_ramp_upload_ = nullptr;
uint8_t* gamma_ramp_upload_mapping_ = nullptr;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT
gamma_ramp_footprints_[ui::d3d12::D3D12Context::kQueuedFrames * 2];
D3D12_PLACED_SUBRESOURCE_FOOTPRINT gamma_ramp_footprints_[kQueuedFrames * 2];
static constexpr uint32_t kSwapTextureWidth = 1280;
static constexpr uint32_t kSwapTextureHeight = 720;
@ -291,7 +300,7 @@ class D3D12CommandProcessor : public CommandProcessor {
struct BufferForDeletion {
ID3D12Resource* buffer;
uint64_t last_usage_frame;
uint64_t last_usage_fence_value;
};
std::deque<BufferForDeletion> buffers_for_deletion_;
@ -305,7 +314,7 @@ class D3D12CommandProcessor : public CommandProcessor {
ID3D12Resource* readback_buffer_ = nullptr;
uint32_t readback_buffer_size_ = 0;
uint32_t current_queue_frame_ = UINT32_MAX;
bool submission_open_ = false;
std::atomic<bool> pix_capture_requested_ = false;
bool pix_capturing_;

View File

@ -199,12 +199,6 @@ std::unique_ptr<xe::ui::RawImage> D3D12GraphicsSystem::Capture() {
return d3d12_command_processor->Capture();
}
void D3D12GraphicsSystem::AwaitFrontBufferUnused() {
if (display_context_ != nullptr) {
display_context_->AwaitAllFramesCompletion();
}
}
void D3D12GraphicsSystem::StretchTextureToFrontBuffer(
D3D12_GPU_DESCRIPTOR_HANDLE handle,
D3D12_GPU_DESCRIPTOR_HANDLE* gamma_ramp_handle, float gamma_ramp_inv_size,

View File

@ -36,8 +36,6 @@ class D3D12GraphicsSystem : public GraphicsSystem {
std::unique_ptr<xe::ui::RawImage> Capture() override;
void AwaitFrontBufferUnused();
// Draws a texture covering the entire viewport to the render target currently
// bound on the specified command list (in D3D12Context::kSwapChainFormat).
// This changes the current pipeline, graphics root signature and primitive

View File

@ -112,7 +112,7 @@ bool PrimitiveConverter::Initialize() {
}
static_ib_upload_->Unmap(0, nullptr);
// Not uploaded yet.
static_ib_upload_frame_ = UINT64_MAX;
static_ib_upload_fence_value_ = UINT64_MAX;
if (FAILED(device->CreateCommittedResource(
&ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE,
&static_ib_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
@ -143,27 +143,27 @@ void PrimitiveConverter::Shutdown() {
void PrimitiveConverter::ClearCache() { buffer_pool_->ClearCache(); }
void PrimitiveConverter::BeginFrame() {
uint64_t completed_fence_value = command_processor_->GetCompletedFenceValue();
// Got a command list now - upload and transition the static index buffer if
// needed.
if (static_ib_upload_ != nullptr) {
auto context = command_processor_->GetD3D12Context();
if (static_ib_upload_frame_ == UINT64_MAX) {
if (static_ib_upload_fence_value_ == UINT64_MAX) {
// Not uploaded yet - upload.
command_processor_->GetDeferredCommandList()->D3DCopyResource(
static_ib_, static_ib_upload_);
command_processor_->PushTransitionBarrier(
static_ib_, D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_INDEX_BUFFER);
static_ib_upload_frame_ = context->GetCurrentFrame();
} else if (context->GetLastCompletedFrame() >= static_ib_upload_frame_) {
static_ib_upload_fence_value_ =
command_processor_->GetCurrentFenceValue();
} else if (completed_fence_value >= static_ib_upload_fence_value_) {
// Completely uploaded - release the upload buffer.
static_ib_upload_->Release();
static_ib_upload_ = nullptr;
}
}
buffer_pool_->Reclaim(
command_processor_->GetD3D12Context()->GetLastCompletedFrame());
buffer_pool_->Reclaim(completed_fence_value);
converted_indices_cache_.clear();
memory_regions_used_ = 0;
@ -694,9 +694,9 @@ void* PrimitiveConverter::AllocateIndices(
size += 16;
}
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
uint8_t* mapping = buffer_pool_->Request(
command_processor_->GetD3D12Context()->GetCurrentFrame(), size, nullptr,
nullptr, &gpu_address);
uint8_t* mapping =
buffer_pool_->Request(command_processor_->GetCurrentFenceValue(), size,
nullptr, nullptr, &gpu_address);
if (mapping == nullptr) {
XELOGE("Failed to allocate space for %u converted %u-bit vertex indices",
count, format == IndexFormat::kInt32 ? 32 : 16);

View File

@ -111,7 +111,7 @@ class PrimitiveConverter {
// CPU-side, used only for uploading - destroyed once the copy commands have
// been completed.
ID3D12Resource* static_ib_upload_ = nullptr;
uint64_t static_ib_upload_frame_;
uint64_t static_ib_upload_fence_value_;
// GPU-side - used for drawing.
ID3D12Resource* static_ib_ = nullptr;
D3D12_GPU_VIRTUAL_ADDRESS static_ib_gpu_address_;

View File

@ -167,8 +167,7 @@ void SharedMemory::Shutdown() {
}
void SharedMemory::BeginFrame() {
upload_buffer_pool_->Reclaim(
command_processor_->GetD3D12Context()->GetLastCompletedFrame());
upload_buffer_pool_->Reclaim(command_processor_->GetCompletedFenceValue());
heap_creation_failed_ = false;
}
@ -329,8 +328,8 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) {
UINT range_tile_count = kHeapSize / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES;
// FIXME(Triang3l): This may cause issues if the emulator is shut down
// mid-frame and the heaps are destroyed before tile mappings are updated
// (AwaitAllFramesCompletion won't catch this then). Defer this until the
// actual command list submission at the end of the frame.
// (awaiting the fence won't catch this then). Defer this until the actual
// command list submission at the end of the frame.
direct_queue->UpdateTileMappings(
buffer_, 1, &region_start_coordinates, &region_size, heaps_[i], 1,
&range_flags, &heap_range_start_offset, &range_tile_count,
@ -376,7 +375,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
ID3D12Resource* upload_buffer;
uint32_t upload_buffer_offset, upload_buffer_size;
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
command_processor_->GetD3D12Context()->GetCurrentFrame(),
command_processor_->GetCurrentFenceValue(),
upload_range_length << page_size_log2_, &upload_buffer,
&upload_buffer_offset, &upload_buffer_size, nullptr);
if (upload_buffer_mapping == nullptr) {

View File

@ -1169,8 +1169,7 @@ void TextureCache::BeginFrame() {
texture_current_usage_time_ = xe::Clock::QueryHostUptimeMillis();
// If memory usage is too high, destroy unused textures.
uint64_t last_completed_frame =
command_processor_->GetD3D12Context()->GetLastCompletedFrame();
uint64_t completed_fence_value = command_processor_->GetCompletedFenceValue();
uint32_t limit_soft_mb = cvars::d3d12_texture_cache_limit_soft;
uint32_t limit_hard_mb = cvars::d3d12_texture_cache_limit_hard;
if (IsResolutionScale2X()) {
@ -1187,7 +1186,7 @@ void TextureCache::BeginFrame() {
break;
}
Texture* texture = texture_used_first_;
if (texture->last_usage_frame > last_completed_frame) {
if (texture->last_usage_fence_value > completed_fence_value) {
break;
}
if (!limit_hard_exceeded &&
@ -1956,8 +1955,8 @@ bool TextureCache::EnsureScaledResolveBufferResident(uint32_t start_unscaled,
kScaledResolveHeapSize / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES;
// FIXME(Triang3l): This may cause issues if the emulator is shut down
// mid-frame and the heaps are destroyed before tile mappings are updated
// (AwaitAllFramesCompletion won't catch this then). Defer this until the
// actual command list submission at the end of the frame.
// (awaiting the fence won't catch this then). Defer this until the actual
// command list submission.
direct_queue->UpdateTileMappings(
scaled_resolve_buffer_, 1, &region_start_coordinates, &region_size,
scaled_resolve_heaps_[i], 1, &range_flags, &heap_range_start_offset,
@ -2293,8 +2292,8 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
// Untiling through a buffer instead of using unordered access because copying
// is not done that often.
desc.Flags = D3D12_RESOURCE_FLAG_NONE;
auto context = command_processor_->GetD3D12Context();
auto device = context->GetD3D12Provider()->GetDevice();
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
// Assuming untiling will be the next operation.
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST;
ID3D12Resource* resource;
@ -2312,7 +2311,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
texture->resource_size =
device->GetResourceAllocationInfo(0, 1, &desc).SizeInBytes;
texture->state = state;
texture->last_usage_frame = context->GetCurrentFrame();
texture->last_usage_fence_value = command_processor_->GetCurrentFenceValue();
texture->last_usage_time = texture_current_usage_time_;
texture->used_previous = texture_used_last_;
texture->used_next = nullptr;
@ -2406,8 +2405,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
}
auto command_list = command_processor_->GetDeferredCommandList();
auto context = command_processor_->GetD3D12Context();
auto provider = context->GetD3D12Provider();
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
// Get the pipeline.
@ -2608,7 +2606,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
}
D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address;
uint8_t* cbuffer_mapping = cbuffer_pool->Request(
context->GetCurrentFrame(),
command_processor_->GetCurrentFenceValue(),
xe::align(uint32_t(sizeof(load_constants)), 256u), nullptr, nullptr,
&cbuffer_gpu_address);
if (cbuffer_mapping == nullptr) {
@ -2686,11 +2684,10 @@ bool TextureCache::LoadTextureData(Texture* texture) {
}
void TextureCache::MarkTextureUsed(Texture* texture) {
uint64_t current_frame =
command_processor_->GetD3D12Context()->GetCurrentFrame();
uint64_t current_fence_value = command_processor_->GetCurrentFenceValue();
// This is called very frequently, don't relink unless needed for caching.
if (texture->last_usage_frame != current_frame) {
texture->last_usage_frame = current_frame;
if (texture->last_usage_fence_value != current_fence_value) {
texture->last_usage_fence_value = current_fence_value;
texture->last_usage_time = texture_current_usage_time_;
if (texture->used_next == nullptr) {
// Simplify the code a bit - already in the end of the list.

View File

@ -343,7 +343,7 @@ class TextureCache {
uint64_t resource_size;
D3D12_RESOURCE_STATES state;
uint64_t last_usage_frame;
uint64_t last_usage_fence_value;
uint64_t last_usage_time;
Texture* used_previous;
Texture* used_next;

View File

@ -1,75 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/d3d12/cpu_fence.h"
#include "xenia/base/logging.h"
namespace xe {
namespace ui {
namespace d3d12 {
std::unique_ptr<CPUFence> CPUFence::Create(ID3D12Device* device,
ID3D12CommandQueue* queue) {
std::unique_ptr<CPUFence> fence(new CPUFence(device, queue));
if (!fence->Initialize()) {
return nullptr;
}
return fence;
}
CPUFence::CPUFence(ID3D12Device* device, ID3D12CommandQueue* queue)
: device_(device), queue_(queue) {}
CPUFence::~CPUFence() {
// First destroying the fence because it may reference the event.
if (fence_ != nullptr) {
fence_->Release();
}
if (completion_event_ != nullptr) {
CloseHandle(completion_event_);
}
}
bool CPUFence::Initialize() {
if (FAILED(device_->CreateFence(0, D3D12_FENCE_FLAG_NONE,
IID_PPV_ARGS(&fence_)))) {
XELOGE("Failed to create a fence");
return false;
}
completion_event_ = CreateEvent(nullptr, false, false, nullptr);
if (completion_event_ == nullptr) {
XELOGE("Failed to create a fence completion event");
fence_->Release();
fence_ = nullptr;
return false;
}
queued_value_ = 0;
return true;
}
void CPUFence::Enqueue() {
++queued_value_;
queue_->Signal(fence_, queued_value_);
}
bool CPUFence::IsCompleted() {
return fence_->GetCompletedValue() >= queued_value_;
}
void CPUFence::Await() {
if (fence_->GetCompletedValue() < queued_value_) {
fence_->SetEventOnCompletion(queued_value_, completion_event_);
WaitForSingleObject(completion_event_, INFINITE);
}
}
} // namespace d3d12
} // namespace ui
} // namespace xe

View File

@ -1,52 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_D3D12_CPU_FENCE_H_
#define XENIA_UI_D3D12_CPU_FENCE_H_
#include <memory>
#include "xenia/ui/d3d12/d3d12_api.h"
namespace xe {
namespace ui {
namespace d3d12 {
class CPUFence {
public:
~CPUFence();
static std::unique_ptr<CPUFence> Create(ID3D12Device* device,
ID3D12CommandQueue* queue);
// Submits the fence to the GPU command queue.
void Enqueue();
// Immediately returns whether the GPU has reached the fence.
bool IsCompleted();
// Blocks until the fence has been reached.
void Await();
private:
CPUFence(ID3D12Device* device, ID3D12CommandQueue* queue);
bool Initialize();
ID3D12Device* device_;
ID3D12CommandQueue* queue_;
ID3D12Fence* fence_ = nullptr;
HANDLE completion_event_ = nullptr;
uint64_t queued_value_ = 0;
};
} // namespace d3d12
} // namespace ui
} // namespace xe
#endif // XENIA_UI_D3D12_CPU_FENCE_H_

View File

@ -16,6 +16,7 @@
#include "xenia/base/math.h"
#include "xenia/ui/d3d12/d3d12_immediate_drawer.h"
#include "xenia/ui/d3d12/d3d12_provider.h"
#include "xenia/ui/d3d12/d3d12_util.h"
#include "xenia/ui/window.h"
DEFINE_bool(d3d12_random_clear_color, false,
@ -25,6 +26,9 @@ namespace xe {
namespace ui {
namespace d3d12 {
constexpr uint32_t D3D12Context::kSwapCommandListCount;
constexpr uint32_t D3D12Context::kSwapChainBufferCount;
D3D12Context::D3D12Context(D3D12Provider* provider, Window* target_window)
: GraphicsContext(provider, target_window) {}
@ -38,23 +42,24 @@ bool D3D12Context::Initialize() {
context_lost_ = false;
current_frame_ = 1;
// No frames have been completed yet.
last_completed_frame_ = 0;
// Keep in sync with the modulo because why not.
current_queue_frame_ = 1;
// Create fences for synchronization of reuse and destruction of transient
// objects (like command lists) and for global shutdown.
for (uint32_t i = 0; i < kQueuedFrames; ++i) {
fences_[i] = CPUFence::Create(device, direct_queue);
if (fences_[i] == nullptr) {
if (target_window_) {
swap_fence_current_value_ = 1;
swap_fence_completed_value_ = 0;
swap_fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr);
if (swap_fence_completion_event_ == nullptr) {
XELOGE("Failed to create the composition fence completion event");
Shutdown();
return false;
}
// Create a fence for transient resources of compositing.
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
IID_PPV_ARGS(&swap_fence_)))) {
XELOGE("Failed to create the composition fence");
Shutdown();
return false;
}
}
if (target_window_) {
// Create the swap chain.
swap_chain_width_ = target_window_->scaled_width();
swap_chain_height_ = target_window_->scaled_height();
DXGI_SWAP_CHAIN_DESC1 swap_chain_desc;
@ -109,7 +114,7 @@ bool D3D12Context::Initialize() {
}
// Create command lists for compositing.
for (uint32_t i = 0; i < kQueuedFrames; ++i) {
for (uint32_t i = 0; i < kSwapCommandListCount; ++i) {
swap_command_lists_[i] = CommandList::Create(
device, direct_queue, D3D12_COMMAND_LIST_TYPE_DIRECT);
if (swap_command_lists_[i] == nullptr) {
@ -126,7 +131,6 @@ bool D3D12Context::Initialize() {
}
}
initialized_fully_ = true;
return true;
}
@ -159,29 +163,30 @@ bool D3D12Context::InitializeSwapChainBuffers() {
}
void D3D12Context::Shutdown() {
if (initialized_fully_ && !context_lost_) {
AwaitAllFramesCompletion();
if (!context_lost_ && swap_fence_ &&
swap_fence_->GetCompletedValue() + 1 < swap_fence_current_value_) {
swap_fence_->SetEventOnCompletion(swap_fence_current_value_ - 1,
swap_fence_completion_event_);
WaitForSingleObject(swap_fence_completion_event_, INFINITE);
}
initialized_fully_ = false;
immediate_drawer_.reset();
if (swap_chain_ != nullptr) {
for (uint32_t i = 0; i < kQueuedFrames; ++i) {
swap_command_lists_[i].reset();
}
for (uint32_t i = 0; i < kSwapCommandListCount; ++i) {
swap_command_lists_[i].reset();
}
if (swap_chain_) {
for (uint32_t i = 0; i < kSwapChainBufferCount; ++i) {
auto& buffer = swap_chain_buffers_[i];
if (buffer == nullptr) {
auto& swap_chain_buffer = swap_chain_buffers_[i];
if (!swap_chain_buffer) {
break;
}
buffer->Release();
buffer = nullptr;
swap_chain_buffer->Release();
swap_chain_buffer = nullptr;
}
if (swap_chain_rtv_heap_ != nullptr) {
if (swap_chain_rtv_heap_) {
swap_chain_rtv_heap_->Release();
swap_chain_rtv_heap_ = nullptr;
}
@ -189,9 +194,14 @@ void D3D12Context::Shutdown() {
swap_chain_->Release();
}
for (uint32_t i = 0; i < kQueuedFrames; ++i) {
fences_[i].reset();
// First release the fence since it may reference the event.
util::ReleaseAndNull(swap_fence_);
if (swap_fence_completion_event_) {
CloseHandle(swap_fence_completion_event_);
swap_fence_completion_event_ = nullptr;
}
swap_fence_current_value_ = 1;
swap_fence_completed_value_ = 0;
}
ImmediateDrawer* D3D12Context::immediate_drawer() {
@ -205,119 +215,125 @@ bool D3D12Context::MakeCurrent() { return true; }
void D3D12Context::ClearCurrent() {}
void D3D12Context::BeginSwap() {
if (context_lost_) {
if (!target_window_ || context_lost_) {
return;
}
// Await the availability of transient objects for the new frame.
// The frame number is incremented in EndSwap so it can be treated the same
// way both when inside a frame and when outside of it (it's tied to actual
// submissions).
fences_[current_queue_frame_]->Await();
// Update the completed frame if didn't explicitly await all queued frames.
if (last_completed_frame_ + kQueuedFrames < current_frame_) {
last_completed_frame_ = current_frame_ - kQueuedFrames;
}
if (target_window_ != nullptr) {
// Resize the swap chain if the window is resized.
uint32_t target_window_width = target_window_->scaled_width();
uint32_t target_window_height = target_window_->scaled_height();
if (swap_chain_width_ != target_window_width ||
swap_chain_height_ != target_window_height) {
// Await the completion of swap chain use.
// Context loss is also faked if resizing fails. In this case, before the
// context is shut down to be recreated, frame completion must be awaited
// (this isn't done if the context is truly lost).
AwaitAllFramesCompletion();
// All buffer references must be released before resizing.
for (uint32_t i = 0; i < kSwapChainBufferCount; ++i) {
swap_chain_buffers_[i]->Release();
swap_chain_buffers_[i] = nullptr;
}
if (FAILED(swap_chain_->ResizeBuffers(
kSwapChainBufferCount, target_window_width, target_window_height,
kSwapChainFormat, 0))) {
context_lost_ = true;
return;
}
swap_chain_width_ = target_window_width;
swap_chain_height_ = target_window_height;
if (!InitializeSwapChainBuffers()) {
context_lost_ = true;
return;
}
// Resize the swap chain if the window is resized.
uint32_t target_window_width = target_window_->scaled_width();
uint32_t target_window_height = target_window_->scaled_height();
if (swap_chain_width_ != target_window_width ||
swap_chain_height_ != target_window_height) {
// Await the completion of swap chain use.
// Context loss is also faked if resizing fails. In this case, before the
// context is shut down to be recreated, frame completion must be awaited
// (this isn't done if the context is truly lost).
if (swap_fence_completed_value_ + 1 < swap_fence_current_value_) {
swap_fence_->SetEventOnCompletion(swap_fence_current_value_ - 1,
swap_fence_completion_event_);
WaitForSingleObject(swap_fence_completion_event_, INFINITE);
swap_fence_completed_value_ = swap_fence_current_value_ - 1;
}
// Bind the back buffer as a render target and clear it.
auto command_list = swap_command_lists_[current_queue_frame_].get();
auto graphics_command_list = command_list->BeginRecording();
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource =
swap_chain_buffers_[swap_chain_back_buffer_index_];
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
graphics_command_list->ResourceBarrier(1, &barrier);
D3D12_CPU_DESCRIPTOR_HANDLE back_buffer_rtv = GetSwapChainBackBufferRTV();
graphics_command_list->OMSetRenderTargets(1, &back_buffer_rtv, TRUE,
nullptr);
float clear_color[4];
if (cvars::d3d12_random_clear_color) {
clear_color[0] =
rand() / float(RAND_MAX); // NOLINT(runtime/threadsafe_fn)
clear_color[1] = 1.0f;
clear_color[2] = 0.0f;
} else {
clear_color[0] = 238.0f / 255.0f;
clear_color[1] = 238.0f / 255.0f;
clear_color[2] = 238.0f / 255.0f;
// All buffer references must be released before resizing.
for (uint32_t i = 0; i < kSwapChainBufferCount; ++i) {
swap_chain_buffers_[i]->Release();
swap_chain_buffers_[i] = nullptr;
}
clear_color[3] = 1.0f;
graphics_command_list->ClearRenderTargetView(back_buffer_rtv, clear_color,
0, nullptr);
}
}
void D3D12Context::EndSwap() {
if (context_lost_) {
return;
}
if (target_window_ != nullptr) {
// Switch the back buffer to presentation state.
auto command_list = swap_command_lists_[current_queue_frame_].get();
auto graphics_command_list = command_list->GetCommandList();
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource =
swap_chain_buffers_[swap_chain_back_buffer_index_];
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
graphics_command_list->ResourceBarrier(1, &barrier);
command_list->Execute();
// Present and check if the context was lost.
HRESULT result = swap_chain_->Present(0, 0);
if (result == DXGI_ERROR_DEVICE_RESET ||
result == DXGI_ERROR_DEVICE_REMOVED) {
if (FAILED(swap_chain_->ResizeBuffers(
kSwapChainBufferCount, target_window_width, target_window_height,
kSwapChainFormat, 0))) {
context_lost_ = true;
return;
}
swap_chain_width_ = target_window_width;
swap_chain_height_ = target_window_height;
if (!InitializeSwapChainBuffers()) {
context_lost_ = true;
return;
}
// Get the back buffer index for the next frame.
swap_chain_back_buffer_index_ = swap_chain_->GetCurrentBackBufferIndex();
}
// Go to the next transient object frame.
fences_[current_queue_frame_]->Enqueue();
++current_queue_frame_;
if (current_queue_frame_ >= kQueuedFrames) {
current_queue_frame_ -= kQueuedFrames;
// Wait for a swap command list to become free.
// Command list 0 is used when swap_fence_current_value_ is 1, 4, 7...
swap_fence_completed_value_ = swap_fence_->GetCompletedValue();
if (swap_fence_completed_value_ + kSwapCommandListCount <
swap_fence_current_value_) {
swap_fence_->SetEventOnCompletion(
swap_fence_current_value_ - kSwapCommandListCount,
swap_fence_completion_event_);
WaitForSingleObject(swap_fence_completion_event_, INFINITE);
swap_fence_completed_value_ = swap_fence_->GetCompletedValue();
}
++current_frame_;
// Bind the back buffer as a render target and clear it.
uint32_t command_list_index =
uint32_t((swap_fence_current_value_ + (kSwapCommandListCount - 1)) %
kSwapCommandListCount);
auto command_list = swap_command_lists_[command_list_index].get();
auto graphics_command_list = command_list->BeginRecording();
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource =
swap_chain_buffers_[swap_chain_back_buffer_index_];
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
graphics_command_list->ResourceBarrier(1, &barrier);
D3D12_CPU_DESCRIPTOR_HANDLE back_buffer_rtv = GetSwapChainBackBufferRTV();
graphics_command_list->OMSetRenderTargets(1, &back_buffer_rtv, TRUE, nullptr);
float clear_color[4];
if (cvars::d3d12_random_clear_color) {
clear_color[0] = rand() / float(RAND_MAX); // NOLINT(runtime/threadsafe_fn)
clear_color[1] = 1.0f;
clear_color[2] = 0.0f;
} else {
clear_color[0] = 238.0f / 255.0f;
clear_color[1] = 238.0f / 255.0f;
clear_color[2] = 238.0f / 255.0f;
}
clear_color[3] = 1.0f;
graphics_command_list->ClearRenderTargetView(back_buffer_rtv, clear_color, 0,
nullptr);
}
void D3D12Context::EndSwap() {
if (!target_window_ || context_lost_) {
return;
}
// Switch the back buffer to presentation state.
uint32_t command_list_index =
uint32_t((swap_fence_current_value_ + (kSwapCommandListCount - 1)) %
kSwapCommandListCount);
auto command_list = swap_command_lists_[command_list_index].get();
auto graphics_command_list = command_list->GetCommandList();
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource =
swap_chain_buffers_[swap_chain_back_buffer_index_];
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
graphics_command_list->ResourceBarrier(1, &barrier);
command_list->Execute();
// Present and check if the context was lost.
HRESULT result = swap_chain_->Present(0, 0);
if (result == DXGI_ERROR_DEVICE_RESET ||
result == DXGI_ERROR_DEVICE_REMOVED) {
context_lost_ = true;
return;
}
// Signal the fence to wait for frame resources to become free again.
GetD3D12Provider()->GetDirectQueue()->Signal(swap_fence_,
swap_fence_current_value_++);
// Get the back buffer index for the next frame.
swap_chain_back_buffer_index_ = swap_chain_->GetCurrentBackBufferIndex();
}
std::unique_ptr<RawImage> D3D12Context::Capture() {
@ -325,19 +341,6 @@ std::unique_ptr<RawImage> D3D12Context::Capture() {
return nullptr;
}
void D3D12Context::AwaitAllFramesCompletion() {
// Await the last frame since previous frames must be completed before it.
if (context_lost_) {
return;
}
uint32_t await_frame = current_queue_frame_ + (kQueuedFrames - 1);
if (await_frame >= kQueuedFrames) {
await_frame -= kQueuedFrames;
}
fences_[await_frame]->Await();
last_completed_frame_ = current_frame_ - 1;
}
D3D12_CPU_DESCRIPTOR_HANDLE D3D12Context::GetSwapChainBufferRTV(
uint32_t buffer_index) const {
return GetD3D12Provider()->OffsetRTVDescriptor(swap_chain_rtv_heap_start_,

View File

@ -13,7 +13,6 @@
#include <memory>
#include "xenia/ui/d3d12/command_list.h"
#include "xenia/ui/d3d12/cpu_fence.h"
#include "xenia/ui/d3d12/d3d12_immediate_drawer.h"
#include "xenia/ui/d3d12/d3d12_provider.h"
#include "xenia/ui/graphics_context.h"
@ -45,16 +44,6 @@ class D3D12Context : public GraphicsContext {
return static_cast<D3D12Provider*>(provider_);
}
// The count of copies of transient objects (like command lists, dynamic
// descriptor heaps) that must be kept when rendering with this context.
static constexpr uint32_t kQueuedFrames = 3;
// The current absolute frame number.
uint64_t GetCurrentFrame() { return current_frame_; }
// The last completed frame - it's fine to destroy objects used in it.
uint64_t GetLastCompletedFrame() { return last_completed_frame_; }
uint32_t GetCurrentQueueFrame() { return current_queue_frame_; }
void AwaitAllFramesCompletion();
static constexpr DXGI_FORMAT kSwapChainFormat = DXGI_FORMAT_R8G8B8A8_UNORM;
ID3D12Resource* GetSwapChainBuffer(uint32_t buffer_index) const {
return swap_chain_buffers_[buffer_index];
@ -71,8 +60,18 @@ class D3D12Context : public GraphicsContext {
width = swap_chain_width_;
height = swap_chain_height_;
}
// Inside the current BeginSwap/EndSwap pair.
uint64_t GetSwapCurrentFenceValue() const {
return swap_fence_current_value_;
}
uint64_t GetSwapCompletedFenceValue() const {
return swap_fence_completed_value_;
}
ID3D12GraphicsCommandList* GetSwapCommandList() const {
return swap_command_lists_[current_queue_frame_]->GetCommandList();
uint32_t command_list_index =
uint32_t((swap_fence_current_value_ + (kSwapCommandListCount - 1)) %
kSwapCommandListCount);
return swap_command_lists_[command_list_index]->GetCommandList();
}
private:
@ -85,15 +84,8 @@ class D3D12Context : public GraphicsContext {
bool InitializeSwapChainBuffers();
void Shutdown();
bool initialized_fully_ = false;
bool context_lost_ = false;
uint64_t current_frame_ = 1;
uint64_t last_completed_frame_ = 0;
uint32_t current_queue_frame_ = 1;
std::unique_ptr<CPUFence> fences_[kQueuedFrames] = {};
static constexpr uint32_t kSwapChainBufferCount = 3;
IDXGISwapChain3* swap_chain_ = nullptr;
uint32_t swap_chain_width_ = 0, swap_chain_height_ = 0;
@ -101,7 +93,17 @@ class D3D12Context : public GraphicsContext {
uint32_t swap_chain_back_buffer_index_ = 0;
ID3D12DescriptorHeap* swap_chain_rtv_heap_ = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE swap_chain_rtv_heap_start_;
std::unique_ptr<CommandList> swap_command_lists_[kQueuedFrames] = {};
uint64_t swap_fence_current_value_ = 1;
uint64_t swap_fence_completed_value_ = 0;
HANDLE swap_fence_completion_event_ = nullptr;
ID3D12Fence* swap_fence_ = nullptr;
static constexpr uint32_t kSwapCommandListCount = 3;
std::unique_ptr<CommandList> swap_command_lists_[kSwapCommandListCount] = {};
// Current is
// ((swap_fence_current_value_ + (kSwapCommandListCount - 1))) %
// kSwapCommandListCount.
std::unique_ptr<D3D12ImmediateDrawer> immediate_drawer_ = nullptr;
};

View File

@ -399,7 +399,7 @@ void D3D12ImmediateDrawer::UpdateTexture(ImmediateTexture* texture,
&location_source, nullptr);
SubmittedTextureUpload submitted_upload;
submitted_upload.buffer = buffer;
submitted_upload.frame = context_->GetCurrentFrame();
submitted_upload.fence_value = context_->GetSwapCurrentFenceValue();
texture_uploads_submitted_.push_back(submitted_upload);
} else {
// Defer uploading to the next frame when there's a command list.
@ -417,14 +417,14 @@ void D3D12ImmediateDrawer::Begin(int render_target_width,
// Use the compositing command list.
current_command_list_ = context_->GetSwapCommandList();
uint64_t current_frame = context_->GetCurrentFrame();
uint64_t last_completed_frame = context_->GetLastCompletedFrame();
uint64_t completed_fence_value = context_->GetSwapCompletedFenceValue();
uint64_t current_fence_value = context_->GetSwapCurrentFenceValue();
// Remove temporary buffers for completed texture uploads.
auto erase_uploads_end = texture_uploads_submitted_.begin();
while (erase_uploads_end != texture_uploads_submitted_.end()) {
uint64_t upload_frame = erase_uploads_end->frame;
if (upload_frame > last_completed_frame) {
uint64_t upload_fence_value = erase_uploads_end->fence_value;
if (upload_fence_value > completed_fence_value) {
++erase_uploads_end;
break;
}
@ -456,13 +456,13 @@ void D3D12ImmediateDrawer::Begin(int render_target_width,
&location_source, nullptr);
SubmittedTextureUpload submitted_upload;
submitted_upload.buffer = pending_upload.buffer;
submitted_upload.frame = current_frame;
submitted_upload.fence_value = current_fence_value;
texture_uploads_submitted_.push_back(submitted_upload);
texture_uploads_pending_.pop_back();
}
vertex_buffer_pool_->Reclaim(last_completed_frame);
texture_descriptor_pool_->Reclaim(last_completed_frame);
vertex_buffer_pool_->Reclaim(completed_fence_value);
texture_descriptor_pool_->Reclaim(completed_fence_value);
texture_descriptor_pool_heap_index_ = DescriptorHeapPool::kHeapIndexInvalid;
current_render_target_width_ = render_target_width;
@ -493,6 +493,7 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) {
if (current_command_list_ == nullptr) {
return;
}
uint64_t current_fence_value = context_->GetSwapCurrentFenceValue();
batch_open_ = false;
@ -502,8 +503,8 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) {
vertex_buffer_view.SizeInBytes =
batch.vertex_count * uint32_t(sizeof(ImmediateVertex));
void* vertex_buffer_mapping = vertex_buffer_pool_->Request(
context_->GetCurrentFrame(), vertex_buffer_view.SizeInBytes, nullptr,
nullptr, &vertex_buffer_view.BufferLocation);
current_fence_value, vertex_buffer_view.SizeInBytes, nullptr, nullptr,
&vertex_buffer_view.BufferLocation);
if (vertex_buffer_mapping == nullptr) {
XELOGE("Failed to get a buffer for %u vertices in the immediate drawer",
batch.vertex_count);
@ -520,7 +521,7 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) {
index_buffer_view.SizeInBytes = batch.index_count * sizeof(uint16_t);
index_buffer_view.Format = DXGI_FORMAT_R16_UINT;
void* index_buffer_mapping = vertex_buffer_pool_->Request(
context_->GetCurrentFrame(),
current_fence_value,
xe::align(index_buffer_view.SizeInBytes, UINT(sizeof(uint32_t))),
nullptr, nullptr, &index_buffer_view.BufferLocation);
if (index_buffer_mapping == nullptr) {
@ -563,7 +564,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
bool bind_texture = current_texture_ != texture;
uint32_t texture_descriptor_index;
uint64_t texture_heap_index = texture_descriptor_pool_->Request(
context_->GetCurrentFrame(), texture_descriptor_pool_heap_index_,
context_->GetSwapCurrentFenceValue(), texture_descriptor_pool_heap_index_,
bind_texture ? 1 : 0, 1, texture_descriptor_index);
if (texture_heap_index == DescriptorHeapPool::kHeapIndexInvalid) {
return;
@ -674,9 +675,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
void D3D12ImmediateDrawer::EndDrawBatch() { batch_open_ = false; }
void D3D12ImmediateDrawer::End() {
current_command_list_ = nullptr;
}
void D3D12ImmediateDrawer::End() { current_command_list_ = nullptr; }
} // namespace d3d12
} // namespace ui

View File

@ -87,7 +87,7 @@ class D3D12ImmediateDrawer : public ImmediateDrawer {
struct SubmittedTextureUpload {
ID3D12Resource* buffer;
uint64_t frame;
uint64_t fence_value;
};
std::deque<SubmittedTextureUpload> texture_uploads_submitted_;

View File

@ -22,9 +22,7 @@ namespace d3d12 {
UploadBufferPool::UploadBufferPool(ID3D12Device* device, uint32_t page_size)
: device_(device), page_size_(page_size) {}
UploadBufferPool::~UploadBufferPool() {
ClearCache();
}
UploadBufferPool::~UploadBufferPool() { ClearCache(); }
void UploadBufferPool::Reclaim(uint64_t completed_fence_value) {
while (submitted_first_) {
@ -172,9 +170,7 @@ DescriptorHeapPool::DescriptorHeapPool(ID3D12Device* device,
uint32_t page_size)
: device_(device), type_(type), page_size_(page_size) {}
DescriptorHeapPool::~DescriptorHeapPool() {
ClearCache();
}
DescriptorHeapPool::~DescriptorHeapPool() { ClearCache(); }
void DescriptorHeapPool::Reclaim(uint64_t completed_fence_value) {
while (submitted_first_) {