[D3D12] Use deferred command list

This commit is contained in:
Triang3l 2019-01-03 15:08:49 +03:00
parent f0c662fa1e
commit 364cae6cc8
9 changed files with 169 additions and 192 deletions

View File

@ -66,24 +66,6 @@ void D3D12CommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
CommandProcessor::RequestFrameTrace(root_path); CommandProcessor::RequestFrameTrace(root_path);
} }
ID3D12GraphicsCommandList* D3D12CommandProcessor::GetCurrentCommandList()
const {
assert_true(current_queue_frame_ != UINT_MAX);
if (current_queue_frame_ == UINT_MAX) {
return nullptr;
}
return command_lists_[current_queue_frame_]->GetCommandList();
}
ID3D12GraphicsCommandList1* D3D12CommandProcessor::GetCurrentCommandList1()
const {
assert_true(current_queue_frame_ != UINT_MAX);
if (current_queue_frame_ == UINT_MAX) {
return nullptr;
}
return command_lists_[current_queue_frame_]->GetCommandList1();
}
bool D3D12CommandProcessor::IsROVUsedForEDRAM() const { bool D3D12CommandProcessor::IsROVUsedForEDRAM() const {
if (!FLAGS_d3d12_edram_rov) { if (!FLAGS_d3d12_edram_rov) {
return false; return false;
@ -144,7 +126,7 @@ void D3D12CommandProcessor::PushUAVBarrier(ID3D12Resource* resource) {
void D3D12CommandProcessor::SubmitBarriers() { void D3D12CommandProcessor::SubmitBarriers() {
UINT barrier_count = UINT(barriers_.size()); UINT barrier_count = UINT(barriers_.size());
if (barrier_count != 0) { if (barrier_count != 0) {
GetCurrentCommandList()->ResourceBarrier(barrier_count, barriers_.data()); deferred_command_list_->D3DResourceBarrier(barrier_count, barriers_.data());
barriers_.clear(); barriers_.clear();
} }
} }
@ -451,15 +433,9 @@ uint64_t D3D12CommandProcessor::RequestViewDescriptors(
} }
ID3D12DescriptorHeap* heap = view_heap_pool_->GetLastRequestHeap(); ID3D12DescriptorHeap* heap = view_heap_pool_->GetLastRequestHeap();
if (current_view_heap_ != heap) { if (current_view_heap_ != heap) {
// Bind the new descriptor heaps if needed.
current_view_heap_ = heap; current_view_heap_ = heap;
ID3D12DescriptorHeap* heaps[2]; deferred_command_list_->SetDescriptorHeaps(current_view_heap_,
uint32_t heap_count = 0; current_sampler_heap_);
heaps[heap_count++] = heap;
if (current_sampler_heap_ != nullptr) {
heaps[heap_count++] = current_sampler_heap_;
}
GetCurrentCommandList()->SetDescriptorHeaps(heap_count, heaps);
} }
auto provider = GetD3D12Context()->GetD3D12Provider(); auto provider = GetD3D12Context()->GetD3D12Provider();
cpu_handle_out = provider->OffsetViewDescriptor( cpu_handle_out = provider->OffsetViewDescriptor(
@ -483,15 +459,9 @@ uint64_t D3D12CommandProcessor::RequestSamplerDescriptors(
} }
ID3D12DescriptorHeap* heap = sampler_heap_pool_->GetLastRequestHeap(); ID3D12DescriptorHeap* heap = sampler_heap_pool_->GetLastRequestHeap();
if (current_sampler_heap_ != heap) { if (current_sampler_heap_ != heap) {
// Bind the new descriptor heaps if needed.
current_sampler_heap_ = heap; current_sampler_heap_ = heap;
ID3D12DescriptorHeap* heaps[2]; deferred_command_list_->SetDescriptorHeaps(current_view_heap_,
uint32_t heap_count = 0; current_sampler_heap_);
heaps[heap_count++] = heap;
if (current_view_heap_ != nullptr) {
heaps[heap_count++] = current_view_heap_;
}
GetCurrentCommandList()->SetDescriptorHeaps(heap_count, heaps);
} }
uint32_t descriptor_offset = uint32_t descriptor_offset =
descriptor_index * descriptor_index *
@ -567,8 +537,8 @@ void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) {
if (FLAGS_d3d12_ssaa_custom_sample_positions && !IsROVUsedForEDRAM()) { if (FLAGS_d3d12_ssaa_custom_sample_positions && !IsROVUsedForEDRAM()) {
auto provider = GetD3D12Context()->GetD3D12Provider(); auto provider = GetD3D12Context()->GetD3D12Provider();
auto tier = provider->GetProgrammableSamplePositionsTier(); auto tier = provider->GetProgrammableSamplePositionsTier();
auto command_list = GetCurrentCommandList1(); if (tier >= 2 &&
if (tier >= 2 && command_list != nullptr) { command_lists_[current_queue_frame_]->GetCommandList1() != nullptr) {
// Depth buffer transitions are affected by sample positions. // Depth buffer transitions are affected by sample positions.
SubmitBarriers(); SubmitBarriers();
// Standard sample positions in Direct3D 10.1, but adjusted to take the // Standard sample positions in Direct3D 10.1, but adjusted to take the
@ -606,9 +576,10 @@ void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) {
d3d_sample_positions[3].X = 4; d3d_sample_positions[3].X = 4;
d3d_sample_positions[3].Y = 4 - 4; d3d_sample_positions[3].Y = 4 - 4;
} }
command_list->SetSamplePositions(1, 4, d3d_sample_positions); deferred_command_list_->D3DSetSamplePositions(1, 4,
d3d_sample_positions);
} else { } else {
command_list->SetSamplePositions(0, 0, nullptr); deferred_command_list_->D3DSetSamplePositions(0, 0, nullptr);
} }
} }
} }
@ -617,7 +588,7 @@ void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) {
void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) { void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
if (current_pipeline_ != pipeline) { if (current_pipeline_ != pipeline) {
GetCurrentCommandList()->SetPipelineState(pipeline); deferred_command_list_->D3DSetPipelineState(pipeline);
current_pipeline_ = pipeline; current_pipeline_ = pipeline;
} }
} }
@ -630,7 +601,7 @@ void D3D12CommandProcessor::SetExternalGraphicsPipeline(
ID3D12PipelineState* pipeline, bool reset_viewport, bool reset_blend_factor, ID3D12PipelineState* pipeline, bool reset_viewport, bool reset_blend_factor,
bool reset_stencil_ref) { bool reset_stencil_ref) {
if (current_pipeline_ != pipeline) { if (current_pipeline_ != pipeline) {
GetCurrentCommandList()->SetPipelineState(pipeline); deferred_command_list_->D3DSetPipelineState(pipeline);
current_pipeline_ = pipeline; current_pipeline_ = pipeline;
} }
current_graphics_root_signature_ = nullptr; current_graphics_root_signature_ = nullptr;
@ -680,6 +651,7 @@ bool D3D12CommandProcessor::SetupContext() {
return false; return false;
} }
} }
deferred_command_list_ = std::make_unique<DeferredCommandList>(this);
constant_buffer_pool_ = constant_buffer_pool_ =
std::make_unique<ui::d3d12::UploadBufferPool>(context, 1024 * 1024); std::make_unique<ui::d3d12::UploadBufferPool>(context, 1024 * 1024);
@ -902,6 +874,7 @@ void D3D12CommandProcessor::ShutdownContext() {
shared_memory_.reset(); shared_memory_.reset();
deferred_command_list_.reset();
for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) { for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) {
command_lists_[i].reset(); command_lists_[i].reset();
} }
@ -959,7 +932,6 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
auto provider = GetD3D12Context()->GetD3D12Provider(); auto provider = GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice(); auto device = provider->GetDevice();
auto command_list = GetCurrentCommandList();
// Upload the new gamma ramps. // Upload the new gamma ramps.
if (dirty_gamma_ramp_normal_) { if (dirty_gamma_ramp_normal_) {
@ -985,8 +957,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
location_dest.pResource = gamma_ramp_texture_; location_dest.pResource = gamma_ramp_texture_;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
location_dest.SubresourceIndex = 0; location_dest.SubresourceIndex = 0;
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, deferred_command_list_->CopyTexture(location_dest, location_source);
nullptr);
dirty_gamma_ramp_normal_ = false; dirty_gamma_ramp_normal_ = false;
} }
if (dirty_gamma_ramp_pwl_) { if (dirty_gamma_ramp_pwl_) {
@ -1011,8 +982,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
location_dest.pResource = gamma_ramp_texture_; location_dest.pResource = gamma_ramp_texture_;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
location_dest.SubresourceIndex = 1; location_dest.SubresourceIndex = 1;
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, deferred_command_list_->CopyTexture(location_dest, location_source);
nullptr);
dirty_gamma_ramp_pwl_ = false; dirty_gamma_ramp_pwl_ = false;
} }
@ -1063,7 +1033,8 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
} }
// Draw the stretching rectangle. // Draw the stretching rectangle.
command_list->OMSetRenderTargets(1, &swap_texture_rtv_, TRUE, nullptr); deferred_command_list_->D3DOMSetRenderTargets(1, &swap_texture_rtv_, TRUE,
nullptr);
D3D12_VIEWPORT viewport; D3D12_VIEWPORT viewport;
viewport.TopLeftX = 0.0f; viewport.TopLeftX = 0.0f;
viewport.TopLeftY = 0.0f; viewport.TopLeftY = 0.0f;
@ -1071,20 +1042,21 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
viewport.Height = float(swap_texture_height); viewport.Height = float(swap_texture_height);
viewport.MinDepth = 0.0f; viewport.MinDepth = 0.0f;
viewport.MaxDepth = 0.0f; viewport.MaxDepth = 0.0f;
command_list->RSSetViewports(1, &viewport); deferred_command_list_->RSSetViewport(viewport);
D3D12_RECT scissor; D3D12_RECT scissor;
scissor.left = 0; scissor.left = 0;
scissor.top = 0; scissor.top = 0;
scissor.right = swap_texture_width; scissor.right = swap_texture_width;
scissor.bottom = swap_texture_height; scissor.bottom = swap_texture_height;
command_list->RSSetScissorRects(1, &scissor); deferred_command_list_->RSSetScissorRect(scissor);
D3D12GraphicsSystem* graphics_system = D3D12GraphicsSystem* graphics_system =
static_cast<D3D12GraphicsSystem*>(graphics_system_); static_cast<D3D12GraphicsSystem*>(graphics_system_);
D3D12_GPU_DESCRIPTOR_HANDLE gamma_ramp_gpu_handle = D3D12_GPU_DESCRIPTOR_HANDLE gamma_ramp_gpu_handle =
provider->OffsetViewDescriptor(descriptor_gpu_start, 1); provider->OffsetViewDescriptor(descriptor_gpu_start, 1);
graphics_system->StretchTextureToFrontBuffer( graphics_system->StretchTextureToFrontBuffer(
descriptor_gpu_start, &gamma_ramp_gpu_handle, descriptor_gpu_start, &gamma_ramp_gpu_handle,
use_pwl_gamma_ramp ? (1.0f / 128.0f) : (1.0f / 256.0f), command_list); use_pwl_gamma_ramp ? (1.0f / 128.0f) : (1.0f / 256.0f),
*deferred_command_list_);
// Ending the current frame's command list anyway, so no need to unbind // Ending the current frame's command list anyway, so no need to unbind
// the render targets when using ROV. // the render targets when using ROV.
@ -1219,7 +1191,6 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
} }
bool new_frame = BeginFrame(); bool new_frame = BeginFrame();
auto command_list = GetCurrentCommandList();
// Set up the render targets - this may bind pipelines. // Set up the render targets - this may bind pipelines.
if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) { if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) {
@ -1299,7 +1270,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
} }
if (primitive_topology_ != primitive_topology) { if (primitive_topology_ != primitive_topology) {
primitive_topology_ = primitive_topology; primitive_topology_ = primitive_topology;
command_list->IASetPrimitiveTopology(primitive_topology); deferred_command_list_->D3DIASetPrimitiveTopology(primitive_topology);
} }
// Update the textures - this may bind pipelines. // Update the textures - this may bind pipelines.
@ -1317,12 +1288,12 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
return false; return false;
} }
if (current_pipeline_ != pipeline) { if (current_pipeline_ != pipeline) {
GetCurrentCommandList()->SetPipelineState(pipeline); deferred_command_list_->D3DSetPipelineState(pipeline);
current_pipeline_ = pipeline; current_pipeline_ = pipeline;
} }
// Update viewport, scissor, blend factor and stencil reference. // Update viewport, scissor, blend factor and stencil reference.
UpdateFixedFunctionState(command_list); UpdateFixedFunctionState();
// Update system constants before uploading them. // Update system constants before uploading them.
UpdateSystemConstantValues( UpdateSystemConstantValues(
@ -1332,8 +1303,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
color_mask, pipeline_render_targets); color_mask, pipeline_render_targets);
// Update constant buffers, descriptors and root parameters. // Update constant buffers, descriptors and root parameters.
if (!UpdateBindings(command_list, vertex_shader, pixel_shader, if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) {
root_signature)) {
return false; return false;
} }
@ -1532,13 +1502,13 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
} else { } else {
shared_memory_->UseForReading(); shared_memory_->UseForReading();
} }
command_list->IASetIndexBuffer(&index_buffer_view); deferred_command_list_->D3DIASetIndexBuffer(&index_buffer_view);
SubmitBarriers(); SubmitBarriers();
if (adaptive_tessellation) { if (adaptive_tessellation) {
// Index buffer used for per-edge factors. // Index buffer used for per-edge factors.
command_list->DrawInstanced(index_count, 1, 0, 0); deferred_command_list_->D3DDrawInstanced(index_count, 1, 0, 0);
} else { } else {
command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0); deferred_command_list_->D3DDrawIndexedInstanced(index_count, 1, 0, 0, 0);
} }
} else { } else {
// Check if need to draw using a conversion index buffer. // Check if need to draw using a conversion index buffer.
@ -1557,10 +1527,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
index_buffer_view.BufferLocation = conversion_gpu_address; index_buffer_view.BufferLocation = conversion_gpu_address;
index_buffer_view.SizeInBytes = converted_index_count * sizeof(uint16_t); index_buffer_view.SizeInBytes = converted_index_count * sizeof(uint16_t);
index_buffer_view.Format = DXGI_FORMAT_R16_UINT; index_buffer_view.Format = DXGI_FORMAT_R16_UINT;
command_list->IASetIndexBuffer(&index_buffer_view); deferred_command_list_->D3DIASetIndexBuffer(&index_buffer_view);
command_list->DrawIndexedInstanced(converted_index_count, 1, 0, 0, 0); deferred_command_list_->D3DDrawIndexedInstanced(converted_index_count, 1,
0, 0, 0);
} else { } else {
command_list->DrawInstanced(index_count, 1, 0, 0); deferred_command_list_->D3DDrawInstanced(index_count, 1, 0, 0);
} }
} }
@ -1659,7 +1630,7 @@ bool D3D12CommandProcessor::BeginFrame() {
graphics_analysis->BeginCapture(); graphics_analysis->BeginCapture();
} }
} }
command_lists_[current_queue_frame_]->BeginRecording(); deferred_command_list_->Reset();
constant_buffer_pool_->BeginFrame(); constant_buffer_pool_->BeginFrame();
view_heap_pool_->BeginFrame(); view_heap_pool_->BeginFrame();
@ -1694,7 +1665,13 @@ bool D3D12CommandProcessor::EndFrame() {
// Submit barriers now because resources the queued barriers are for may be // Submit barriers now because resources the queued barriers are for may be
// destroyed between frames. // destroyed between frames.
SubmitBarriers(); SubmitBarriers();
command_lists_[current_queue_frame_]->Execute();
// Submit the command list.
auto current_command_list = command_lists_[current_queue_frame_].get();
current_command_list->BeginRecording();
deferred_command_list_->Execute(current_command_list->GetCommandList(),
current_command_list->GetCommandList1());
current_command_list->Execute();
if (pix_capturing_) { if (pix_capturing_) {
IDXGraphicsAnalysis* graphics_analysis = IDXGraphicsAnalysis* graphics_analysis =
@ -1716,8 +1693,7 @@ bool D3D12CommandProcessor::EndFrame() {
return true; return true;
} }
void D3D12CommandProcessor::UpdateFixedFunctionState( void D3D12CommandProcessor::UpdateFixedFunctionState() {
ID3D12GraphicsCommandList* command_list) {
auto& regs = *register_file_; auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES #if FINE_GRAINED_DRAW_SCOPES
@ -1814,7 +1790,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
ff_viewport_update_needed_ |= ff_viewport_.MaxDepth != viewport.MaxDepth; ff_viewport_update_needed_ |= ff_viewport_.MaxDepth != viewport.MaxDepth;
if (ff_viewport_update_needed_) { if (ff_viewport_update_needed_) {
ff_viewport_ = viewport; ff_viewport_ = viewport;
command_list->RSSetViewports(1, &viewport); deferred_command_list_->RSSetViewport(viewport);
ff_viewport_update_needed_ = false; ff_viewport_update_needed_ = false;
} }
@ -1845,7 +1821,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor.bottom; ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor.bottom;
if (ff_scissor_update_needed_) { if (ff_scissor_update_needed_) {
ff_scissor_ = scissor; ff_scissor_ = scissor;
command_list->RSSetScissorRects(1, &scissor); deferred_command_list_->RSSetScissorRect(scissor);
ff_scissor_update_needed_ = false; ff_scissor_update_needed_ = false;
} }
@ -1864,7 +1840,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
ff_blend_factor_[1] = regs[XE_GPU_REG_RB_BLEND_GREEN].f32; ff_blend_factor_[1] = regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
ff_blend_factor_[2] = regs[XE_GPU_REG_RB_BLEND_BLUE].f32; ff_blend_factor_[2] = regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
ff_blend_factor_[3] = regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; ff_blend_factor_[3] = regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
command_list->OMSetBlendFactor(ff_blend_factor_); deferred_command_list_->D3DOMSetBlendFactor(ff_blend_factor_);
ff_blend_factor_update_needed_ = false; ff_blend_factor_update_needed_ = false;
} }
@ -1873,7 +1849,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref; ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref;
if (ff_stencil_ref_update_needed_) { if (ff_stencil_ref_update_needed_) {
ff_stencil_ref_ = stencil_ref; ff_stencil_ref_ = stencil_ref;
command_list->OMSetStencilRef(stencil_ref); deferred_command_list_->D3DOMSetStencilRef(stencil_ref);
ff_stencil_ref_update_needed_ = false; ff_stencil_ref_update_needed_ = false;
} }
} }
@ -2493,8 +2469,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
} }
bool D3D12CommandProcessor::UpdateBindings( bool D3D12CommandProcessor::UpdateBindings(
ID3D12GraphicsCommandList* command_list, const D3D12Shader* vertex_shader, const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
const D3D12Shader* pixel_shader, ID3D12RootSignature* root_signature) { ID3D12RootSignature* root_signature) {
auto provider = GetD3D12Context()->GetD3D12Provider(); auto provider = GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice(); auto device = provider->GetDevice();
auto& regs = *register_file_; auto& regs = *register_file_;
@ -2510,7 +2486,7 @@ bool D3D12CommandProcessor::UpdateBindings(
current_graphics_root_extras_); current_graphics_root_extras_);
// We don't know which root parameters are up to date anymore. // We don't know which root parameters are up to date anymore.
current_graphics_root_up_to_date_ = 0; current_graphics_root_up_to_date_ = 0;
command_list->SetGraphicsRootSignature(root_signature); deferred_command_list_->D3DSetGraphicsRootSignature(root_signature);
} }
XXH64_state_t hash_state; XXH64_state_t hash_state;
@ -2953,13 +2929,13 @@ bool D3D12CommandProcessor::UpdateBindings(
// Update the root parameters. // Update the root parameters.
if (!(current_graphics_root_up_to_date_ & if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_FetchConstants))) { (1u << kRootParameter_FetchConstants))) {
command_list->SetGraphicsRootDescriptorTable(kRootParameter_FetchConstants, deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
gpu_handle_fetch_constants_); kRootParameter_FetchConstants, gpu_handle_fetch_constants_);
current_graphics_root_up_to_date_ |= 1u << kRootParameter_FetchConstants; current_graphics_root_up_to_date_ |= 1u << kRootParameter_FetchConstants;
} }
if (!(current_graphics_root_up_to_date_ & if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_FloatConstantsVertex))) { (1u << kRootParameter_FloatConstantsVertex))) {
command_list->SetGraphicsRootDescriptorTable( deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
kRootParameter_FloatConstantsVertex, kRootParameter_FloatConstantsVertex,
gpu_handle_float_constants_vertex_); gpu_handle_float_constants_vertex_);
current_graphics_root_up_to_date_ |= 1u current_graphics_root_up_to_date_ |= 1u
@ -2967,26 +2943,26 @@ bool D3D12CommandProcessor::UpdateBindings(
} }
if (!(current_graphics_root_up_to_date_ & if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_FloatConstantsPixel))) { (1u << kRootParameter_FloatConstantsPixel))) {
command_list->SetGraphicsRootDescriptorTable( deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
kRootParameter_FloatConstantsPixel, gpu_handle_float_constants_pixel_); kRootParameter_FloatConstantsPixel, gpu_handle_float_constants_pixel_);
current_graphics_root_up_to_date_ |= 1u current_graphics_root_up_to_date_ |= 1u
<< kRootParameter_FloatConstantsPixel; << kRootParameter_FloatConstantsPixel;
} }
if (!(current_graphics_root_up_to_date_ & if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_SystemConstants))) { (1u << kRootParameter_SystemConstants))) {
command_list->SetGraphicsRootDescriptorTable(kRootParameter_SystemConstants, deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
gpu_handle_system_constants_); kRootParameter_SystemConstants, gpu_handle_system_constants_);
current_graphics_root_up_to_date_ |= 1u << kRootParameter_SystemConstants; current_graphics_root_up_to_date_ |= 1u << kRootParameter_SystemConstants;
} }
if (!(current_graphics_root_up_to_date_ & if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_BoolLoopConstants))) { (1u << kRootParameter_BoolLoopConstants))) {
command_list->SetGraphicsRootDescriptorTable( deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
kRootParameter_BoolLoopConstants, gpu_handle_bool_loop_constants_); kRootParameter_BoolLoopConstants, gpu_handle_bool_loop_constants_);
current_graphics_root_up_to_date_ |= 1u << kRootParameter_BoolLoopConstants; current_graphics_root_up_to_date_ |= 1u << kRootParameter_BoolLoopConstants;
} }
if (!(current_graphics_root_up_to_date_ & if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_SharedMemoryAndEDRAM))) { (1u << kRootParameter_SharedMemoryAndEDRAM))) {
command_list->SetGraphicsRootDescriptorTable( deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
kRootParameter_SharedMemoryAndEDRAM, kRootParameter_SharedMemoryAndEDRAM,
gpu_handle_shared_memory_and_edram_); gpu_handle_shared_memory_and_edram_);
current_graphics_root_up_to_date_ |= 1u current_graphics_root_up_to_date_ |= 1u
@ -2996,29 +2972,29 @@ bool D3D12CommandProcessor::UpdateBindings(
extra_index = current_graphics_root_extras_.textures_pixel; extra_index = current_graphics_root_extras_.textures_pixel;
if (extra_index != RootExtraParameterIndices::kUnavailable && if (extra_index != RootExtraParameterIndices::kUnavailable &&
!(current_graphics_root_up_to_date_ & (1u << extra_index))) { !(current_graphics_root_up_to_date_ & (1u << extra_index))) {
command_list->SetGraphicsRootDescriptorTable(extra_index, deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
gpu_handle_textures_pixel_); extra_index, gpu_handle_textures_pixel_);
current_graphics_root_up_to_date_ |= 1u << extra_index; current_graphics_root_up_to_date_ |= 1u << extra_index;
} }
extra_index = current_graphics_root_extras_.samplers_pixel; extra_index = current_graphics_root_extras_.samplers_pixel;
if (extra_index != RootExtraParameterIndices::kUnavailable && if (extra_index != RootExtraParameterIndices::kUnavailable &&
!(current_graphics_root_up_to_date_ & (1u << extra_index))) { !(current_graphics_root_up_to_date_ & (1u << extra_index))) {
command_list->SetGraphicsRootDescriptorTable(extra_index, deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
gpu_handle_samplers_pixel_); extra_index, gpu_handle_samplers_pixel_);
current_graphics_root_up_to_date_ |= 1u << extra_index; current_graphics_root_up_to_date_ |= 1u << extra_index;
} }
extra_index = current_graphics_root_extras_.textures_vertex; extra_index = current_graphics_root_extras_.textures_vertex;
if (extra_index != RootExtraParameterIndices::kUnavailable && if (extra_index != RootExtraParameterIndices::kUnavailable &&
!(current_graphics_root_up_to_date_ & (1u << extra_index))) { !(current_graphics_root_up_to_date_ & (1u << extra_index))) {
command_list->SetGraphicsRootDescriptorTable(extra_index, deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
gpu_handle_textures_vertex_); extra_index, gpu_handle_textures_vertex_);
current_graphics_root_up_to_date_ |= 1u << extra_index; current_graphics_root_up_to_date_ |= 1u << extra_index;
} }
extra_index = current_graphics_root_extras_.samplers_vertex; extra_index = current_graphics_root_extras_.samplers_vertex;
if (extra_index != RootExtraParameterIndices::kUnavailable && if (extra_index != RootExtraParameterIndices::kUnavailable &&
!(current_graphics_root_up_to_date_ & (1u << extra_index))) { !(current_graphics_root_up_to_date_ & (1u << extra_index))) {
command_list->SetGraphicsRootDescriptorTable(extra_index, deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
gpu_handle_samplers_vertex_); extra_index, gpu_handle_samplers_vertex_);
current_graphics_root_up_to_date_ |= 1u << extra_index; current_graphics_root_up_to_date_ |= 1u << extra_index;
} }

View File

@ -18,6 +18,7 @@
#include "xenia/gpu/command_processor.h" #include "xenia/gpu/command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h"
#include "xenia/gpu/d3d12/deferred_command_list.h"
#include "xenia/gpu/d3d12/pipeline_cache.h" #include "xenia/gpu/d3d12/pipeline_cache.h"
#include "xenia/gpu/d3d12/primitive_converter.h" #include "xenia/gpu/d3d12/primitive_converter.h"
#include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/d3d12/render_target_cache.h"
@ -49,9 +50,10 @@ class D3D12CommandProcessor : public CommandProcessor {
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get()); return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
} }
// Returns the drawing command list for the currently open frame. // Returns the deferred drawing command list for the currently open frame.
ID3D12GraphicsCommandList* GetCurrentCommandList() const; DeferredCommandList* GetDeferredCommandList() {
ID3D12GraphicsCommandList1* GetCurrentCommandList1() const; return deferred_command_list_.get();
}
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion // Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
// and blending performed in pixel shaders be used instead of host render // and blending performed in pixel shaders be used instead of host render
@ -200,13 +202,12 @@ class D3D12CommandProcessor : public CommandProcessor {
// Returns true if an open frame was ended. // Returns true if an open frame was ended.
bool EndFrame(); bool EndFrame();
void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list); void UpdateFixedFunctionState();
void UpdateSystemConstantValues( void UpdateSystemConstantValues(
bool shared_memory_is_uav, PrimitiveType primitive_type, bool shared_memory_is_uav, PrimitiveType primitive_type,
Endian index_endian, uint32_t edge_factor_base, uint32_t color_mask, Endian index_endian, uint32_t edge_factor_base, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]); const RenderTargetCache::PipelineRenderTarget render_targets[4]);
bool UpdateBindings(ID3D12GraphicsCommandList* command_list, bool UpdateBindings(const D3D12Shader* vertex_shader,
const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader, const D3D12Shader* pixel_shader,
ID3D12RootSignature* root_signature); ID3D12RootSignature* root_signature);
@ -221,6 +222,7 @@ class D3D12CommandProcessor : public CommandProcessor {
std::unique_ptr<ui::d3d12::CommandList> std::unique_ptr<ui::d3d12::CommandList>
command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {}; command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {};
std::unique_ptr<DeferredCommandList> deferred_command_list_ = nullptr;
std::unique_ptr<SharedMemory> shared_memory_ = nullptr; std::unique_ptr<SharedMemory> shared_memory_ = nullptr;

View File

@ -210,6 +210,25 @@ void D3D12GraphicsSystem::StretchTextureToFrontBuffer(
command_list->DrawInstanced(3, 1, 0, 0); command_list->DrawInstanced(3, 1, 0, 0);
} }
void D3D12GraphicsSystem::StretchTextureToFrontBuffer(
D3D12_GPU_DESCRIPTOR_HANDLE handle,
D3D12_GPU_DESCRIPTOR_HANDLE* gamma_ramp_handle, float gamma_ramp_inv_size,
DeferredCommandList& command_list) {
if (gamma_ramp_handle != nullptr) {
command_list.D3DSetPipelineState(stretch_gamma_pipeline_);
command_list.D3DSetGraphicsRootSignature(stretch_gamma_root_signature_);
command_list.D3DSetGraphicsRootDescriptorTable(1, *gamma_ramp_handle);
command_list.D3DSetGraphicsRoot32BitConstants(2, 1, &gamma_ramp_inv_size,
0);
} else {
command_list.D3DSetPipelineState(stretch_pipeline_);
command_list.D3DSetGraphicsRootSignature(stretch_root_signature_);
}
command_list.D3DSetGraphicsRootDescriptorTable(0, handle);
command_list.D3DIASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
command_list.D3DDrawInstanced(3, 1, 0, 0);
}
std::unique_ptr<CommandProcessor> std::unique_ptr<CommandProcessor>
D3D12GraphicsSystem::CreateCommandProcessor() { D3D12GraphicsSystem::CreateCommandProcessor() {
return std::unique_ptr<CommandProcessor>( return std::unique_ptr<CommandProcessor>(

View File

@ -13,6 +13,7 @@
#include <memory> #include <memory>
#include "xenia/gpu/command_processor.h" #include "xenia/gpu/command_processor.h"
#include "xenia/gpu/d3d12/deferred_command_list.h"
#include "xenia/gpu/graphics_system.h" #include "xenia/gpu/graphics_system.h"
#include "xenia/ui/d3d12/d3d12_context.h" #include "xenia/ui/d3d12/d3d12_context.h"
@ -42,6 +43,10 @@ class D3D12GraphicsSystem : public GraphicsSystem {
D3D12_GPU_DESCRIPTOR_HANDLE handle, D3D12_GPU_DESCRIPTOR_HANDLE handle,
D3D12_GPU_DESCRIPTOR_HANDLE* gamma_ramp_handle, float gamma_ramp_inv_size, D3D12_GPU_DESCRIPTOR_HANDLE* gamma_ramp_handle, float gamma_ramp_inv_size,
ID3D12GraphicsCommandList* command_list); ID3D12GraphicsCommandList* command_list);
void StretchTextureToFrontBuffer(
D3D12_GPU_DESCRIPTOR_HANDLE handle,
D3D12_GPU_DESCRIPTOR_HANDLE* gamma_ramp_handle, float gamma_ramp_inv_size,
DeferredCommandList& command_list);
private: private:
std::unique_ptr<CommandProcessor> CreateCommandProcessor() override; std::unique_ptr<CommandProcessor> CreateCommandProcessor() override;

View File

@ -66,6 +66,13 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
args.start_index_location, args.base_vertex_location, args.start_index_location, args.base_vertex_location,
args.start_instance_location); args.start_instance_location);
} break; } break;
case Command::kD3DDrawInstanced: {
auto& args =
*reinterpret_cast<const D3DDrawInstancedArguments*>(stream);
command_list->DrawInstanced(
args.vertex_count_per_instance, args.instance_count,
args.start_vertex_location, args.start_instance_location);
} break;
case Command::kD3DIASetIndexBuffer: { case Command::kD3DIASetIndexBuffer: {
auto view = reinterpret_cast<const D3D12_INDEX_BUFFER_VIEW*>(stream); auto view = reinterpret_cast<const D3D12_INDEX_BUFFER_VIEW*>(stream);
command_list->IASetIndexBuffer( command_list->IASetIndexBuffer(
@ -188,7 +195,6 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
stream += header[1]; stream += header[1];
stream_remaining -= header[1]; stream_remaining -= header[1];
} }
Reset();
} }
void* DeferredCommandList::WriteCommand(Command command, void* DeferredCommandList::WriteCommand(Command command,

View File

@ -127,7 +127,7 @@ void PrimitiveConverter::BeginFrame() {
auto context = command_processor_->GetD3D12Context(); auto context = command_processor_->GetD3D12Context();
if (static_ib_upload_frame_ == UINT64_MAX) { if (static_ib_upload_frame_ == UINT64_MAX) {
// Not uploaded yet - upload. // Not uploaded yet - upload.
command_processor_->GetCurrentCommandList()->CopyResource( command_processor_->GetDeferredCommandList()->D3DCopyResource(
static_ib_, static_ib_upload_); static_ib_, static_ib_upload_);
command_processor_->PushTransitionBarrier( command_processor_->PushTransitionBarrier(
static_ib_, D3D12_RESOURCE_STATE_COPY_DEST, static_ib_, D3D12_RESOURCE_STATE_COPY_DEST,

View File

@ -466,10 +466,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// - New render target is added, but doesn't overlap unsaved data from other // - New render target is added, but doesn't overlap unsaved data from other
// currently or previously used render targets, and it doesn't require a // currently or previously used render targets, and it doesn't require a
// bigger size. // bigger size.
auto command_list = command_processor_->GetCurrentCommandList(); auto command_list = command_processor_->GetDeferredCommandList();
if (command_list == nullptr) {
return false;
}
auto& regs = *register_file_; auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES #if FINE_GRAINED_DRAW_SCOPES
@ -856,7 +853,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN; current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN;
} }
command_processor_->SubmitBarriers(); command_processor_->SubmitBarriers();
command_list->OMSetRenderTargets(rtv_count, rtv_handles, FALSE, dsv_handle); command_list->D3DOMSetRenderTargets(rtv_count, rtv_handles, FALSE,
dsv_handle);
} }
// Update the dirty regions. // Update the dirty regions.
@ -1054,10 +1052,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
return false; return false;
} }
auto command_list = command_processor_->GetCurrentCommandList(); auto command_list = command_processor_->GetDeferredCommandList();
if (command_list == nullptr) {
return false;
}
// Get the destination region and clamp the source region to it. // Get the destination region and clamp the source region to it.
uint32_t rb_copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32; uint32_t rb_copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32;
@ -1251,7 +1246,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
command_processor_->SubmitBarriers(); command_processor_->SubmitBarriers();
// Dispatch the computation. // Dispatch the computation.
command_list->SetComputeRootSignature(edram_load_store_root_signature_); command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_);
EDRAMLoadStoreRootConstants root_constants; EDRAMLoadStoreRootConstants root_constants;
// Only 5 bits - assuming pre-offset address. // Only 5 bits - assuming pre-offset address.
assert_true(dest_offset_x <= 31 && dest_offset_y <= 31); assert_true(dest_offset_x <= 31 && dest_offset_y <= 31);
@ -1301,9 +1296,9 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
root_constants.base_samples_2x_depth_pitch |= 1 << 12; root_constants.base_samples_2x_depth_pitch |= 1 << 12;
} }
} }
command_list->SetComputeRoot32BitConstants( command_list->D3DSetComputeRoot32BitConstants(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
command_processor_->SetComputePipeline( command_processor_->SetComputePipeline(
src_64bpp ? edram_tile_sample_64bpp_pipeline_ src_64bpp ? edram_tile_sample_64bpp_pipeline_
: edram_tile_sample_32bpp_pipeline_); : edram_tile_sample_32bpp_pipeline_);
@ -1319,7 +1314,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
// is 80x16 destination pixels after applying the resolution scale. // is 80x16 destination pixels after applying the resolution scale.
group_count_x <<= resolution_scale_log2; group_count_x <<= resolution_scale_log2;
group_count_y <<= resolution_scale_log2; group_count_y <<= resolution_scale_log2;
command_list->Dispatch(group_count_x, group_count_y, 1); command_list->D3DDispatch(group_count_x, group_count_y, 1);
// Commit the write. // Commit the write.
command_processor_->PushUAVBarrier( command_processor_->PushUAVBarrier(
@ -1407,7 +1402,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
command_processor_->SubmitBarriers(); command_processor_->SubmitBarriers();
command_list->SetComputeRootSignature(edram_load_store_root_signature_); command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_);
EDRAMLoadStoreRootConstants load_root_constants; EDRAMLoadStoreRootConstants load_root_constants;
load_root_constants.rt_color_depth_offset = uint32_t(footprint.Offset); load_root_constants.rt_color_depth_offset = uint32_t(footprint.Offset);
@ -1422,7 +1417,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
load_root_constants.base_samples_2x_depth_pitch |= 1 << 12; load_root_constants.base_samples_2x_depth_pitch |= 1 << 12;
} }
} }
command_list->SetComputeRoot32BitConstants( command_list->D3DSetComputeRoot32BitConstants(
0, sizeof(load_root_constants) / sizeof(uint32_t), &load_root_constants, 0, sizeof(load_root_constants) / sizeof(uint32_t), &load_root_constants,
0); 0);
@ -1431,14 +1426,14 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
ui::d3d12::util::CreateRawBufferUAV( ui::d3d12::util::CreateRawBufferUAV(
device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1), device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
copy_buffer, render_target->copy_buffer_size); copy_buffer, render_target->copy_buffer_size);
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
EDRAMLoadStoreMode mode = GetLoadStoreMode(false, src_format); EDRAMLoadStoreMode mode = GetLoadStoreMode(false, src_format);
command_processor_->SetComputePipeline( command_processor_->SetComputePipeline(
resolution_scale_2x_ ? edram_load_2x_resolve_pipelines_[size_t(mode)] resolution_scale_2x_ ? edram_load_2x_resolve_pipelines_[size_t(mode)]
: edram_load_pipelines_[size_t(mode)]); : edram_load_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples, with both 1x and 2x resolution scales. // 1 group per 80x16 samples, with both 1x and 2x resolution scales.
command_list->Dispatch(row_width_ss_div_80, rows, 1); command_list->D3DDispatch(row_width_ss_div_80, rows, 1);
command_processor_->PushUAVBarrier(copy_buffer); command_processor_->PushUAVBarrier(copy_buffer);
// Go to the next descriptor set. // Go to the next descriptor set.
@ -1468,8 +1463,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
location_dest.pResource = render_target->resource; location_dest.pResource = render_target->resource;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
location_dest.SubresourceIndex = 0; location_dest.SubresourceIndex = 0;
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, command_list->CopyTexture(location_dest, location_source);
nullptr);
// Do the resolve. Render targets unbound already, safe to call // Do the resolve. Render targets unbound already, safe to call
// OMSetRenderTargets. // OMSetRenderTargets.
@ -1486,7 +1480,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
D3D12_RESOURCE_STATE_RENDER_TARGET); D3D12_RESOURCE_STATE_RENDER_TARGET);
resolve_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET; resolve_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET;
command_list->SetGraphicsRootSignature(resolve_root_signature_); command_list->D3DSetGraphicsRootSignature(resolve_root_signature_);
ResolveRootConstants resolve_root_constants; ResolveRootConstants resolve_root_constants;
uint32_t samples_x_log2 = msaa_samples >= MsaaSamples::k4X ? 1 : 0; uint32_t samples_x_log2 = msaa_samples >= MsaaSamples::k4X ? 1 : 0;
@ -1542,7 +1536,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
break; break;
} }
} }
command_list->SetGraphicsRoot32BitConstants( command_list->D3DSetGraphicsRoot32BitConstants(
0, sizeof(resolve_root_constants) / sizeof(uint32_t), 0, sizeof(resolve_root_constants) / sizeof(uint32_t),
&resolve_root_constants, 0); &resolve_root_constants, 0);
@ -1583,13 +1577,13 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
rt_srv_desc.Texture2D.ResourceMinLODClamp = 0.0f; rt_srv_desc.Texture2D.ResourceMinLODClamp = 0.0f;
device->CreateShaderResourceView(render_target->resource, &rt_srv_desc, device->CreateShaderResourceView(render_target->resource, &rt_srv_desc,
descriptor_cpu_start); descriptor_cpu_start);
command_list->SetGraphicsRootDescriptorTable(1, descriptor_gpu_start); command_list->D3DSetGraphicsRootDescriptorTable(1, descriptor_gpu_start);
command_processor_->SubmitBarriers(); command_processor_->SubmitBarriers();
command_processor_->SetSamplePositions(MsaaSamples::k1X); command_processor_->SetSamplePositions(MsaaSamples::k1X);
command_processor_->SetExternalGraphicsPipeline(resolve_pipeline); command_processor_->SetExternalGraphicsPipeline(resolve_pipeline);
command_list->OMSetRenderTargets(1, &resolve_target->rtv_handle, TRUE, command_list->D3DOMSetRenderTargets(1, &resolve_target->rtv_handle, TRUE,
nullptr); nullptr);
D3D12_VIEWPORT viewport; D3D12_VIEWPORT viewport;
viewport.TopLeftX = 0.0f; viewport.TopLeftX = 0.0f;
viewport.TopLeftY = 0.0f; viewport.TopLeftY = 0.0f;
@ -1597,19 +1591,20 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
viewport.Height = float(copy_height << resolution_scale_log2); viewport.Height = float(copy_height << resolution_scale_log2);
viewport.MinDepth = 0.0f; viewport.MinDepth = 0.0f;
viewport.MaxDepth = 1.0f; viewport.MaxDepth = 1.0f;
command_list->RSSetViewports(1, &viewport); command_list->RSSetViewport(viewport);
D3D12_RECT scissor; D3D12_RECT scissor;
scissor.left = 0; scissor.left = 0;
scissor.top = 0; scissor.top = 0;
scissor.right = copy_width << resolution_scale_log2; scissor.right = copy_width << resolution_scale_log2;
scissor.bottom = copy_height << resolution_scale_log2; scissor.bottom = copy_height << resolution_scale_log2;
command_list->RSSetScissorRects(1, &scissor); command_list->RSSetScissorRect(scissor);
command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); command_list->D3DIASetPrimitiveTopology(
command_list->DrawInstanced(3, 1, 0, 0); D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
command_list->D3DDrawInstanced(3, 1, 0, 0);
if (command_processor_->IsROVUsedForEDRAM()) { if (command_processor_->IsROVUsedForEDRAM()) {
// Clean up - the ROV path doesn't need render targets bound and has // Clean up - the ROV path doesn't need render targets bound and has
// non-zero ForcedSampleCount. // non-zero ForcedSampleCount.
command_list->OMSetRenderTargets(0, nullptr, FALSE, nullptr); command_list->D3DOMSetRenderTargets(0, nullptr, FALSE, nullptr);
} }
// Copy the resolve target to the buffer. // Copy the resolve target to the buffer.
@ -1628,8 +1623,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
location_dest.pResource = copy_buffer; location_dest.pResource = copy_buffer;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
location_dest.PlacedFootprint = resolve_target->footprint; location_dest.PlacedFootprint = resolve_target->footprint;
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, command_list->CopyTexture(location_dest, location_source);
nullptr);
// Tile the resolved texture. The texture cache expects the buffer to be a // Tile the resolved texture. The texture cache expects the buffer to be a
// non-pixel-shader SRV. // non-pixel-shader SRV.
@ -1681,10 +1675,7 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
uint32_t samples_y_log2 = msaa_samples >= MsaaSamples::k2X ? 1 : 0; uint32_t samples_y_log2 = msaa_samples >= MsaaSamples::k2X ? 1 : 0;
// Get everything needed for clearing. // Get everything needed for clearing.
auto command_list = command_processor_->GetCurrentCommandList(); auto command_list = command_processor_->GetDeferredCommandList();
if (command_list == nullptr) {
return false;
}
auto device = auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
@ -1738,14 +1729,14 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
root_constants.clear_color_high = regs[reg].u32; root_constants.clear_color_high = regs[reg].u32;
command_processor_->SetComputePipeline(edram_clear_32bpp_pipeline_); command_processor_->SetComputePipeline(edram_clear_32bpp_pipeline_);
} }
command_list->SetComputeRootSignature(edram_clear_root_signature_); command_list->D3DSetComputeRootSignature(edram_clear_root_signature_);
command_list->SetComputeRoot32BitConstants( command_list->D3DSetComputeRoot32BitConstants(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
ui::d3d12::util::CreateRawBufferUAV(device, descriptor_cpu_start, ui::d3d12::util::CreateRawBufferUAV(device, descriptor_cpu_start,
edram_buffer_, GetEDRAMBufferSize()); edram_buffer_, GetEDRAMBufferSize());
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
// 1 group per 80x16 samples. Resolution scale handled in the shader itself. // 1 group per 80x16 samples. Resolution scale handled in the shader itself.
command_list->Dispatch(row_width_ss_div_80, rows, 1); command_list->D3DDispatch(row_width_ss_div_80, rows, 1);
command_processor_->PushUAVBarrier(edram_buffer_); command_processor_->PushUAVBarrier(edram_buffer_);
return true; return true;
@ -2350,10 +2341,7 @@ RenderTargetCache::EDRAMLoadStoreMode RenderTargetCache::GetLoadStoreMode(
} }
void RenderTargetCache::StoreRenderTargetsToEDRAM() { void RenderTargetCache::StoreRenderTargetsToEDRAM() {
auto command_list = command_processor_->GetCurrentCommandList(); auto command_list = command_processor_->GetDeferredCommandList();
if (command_list == nullptr) {
return;
}
// Extract only the render targets that need to be stored, transition them to // Extract only the render targets that need to be stored, transition them to
// copy sources and calculate copy buffer size. // copy sources and calculate copy buffer size.
@ -2406,13 +2394,13 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
// Set up the bindings. // Set up the bindings.
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice(); auto device = provider->GetDevice();
command_list->SetComputeRootSignature(edram_load_store_root_signature_); command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_);
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start, copy_buffer, ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start, copy_buffer,
copy_buffer_size); copy_buffer_size);
ui::d3d12::util::CreateRawBufferUAV( ui::d3d12::util::CreateRawBufferUAV(
device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1), device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
edram_buffer_, GetEDRAMBufferSize()); edram_buffer_, GetEDRAMBufferSize());
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
// Sort the bindings in ascending order of EDRAM base so data in the render // Sort the bindings in ascending order of EDRAM base so data in the render
// targets placed farther in EDRAM isn't lost in case of overlap. // targets placed farther in EDRAM isn't lost in case of overlap.
@ -2461,8 +2449,7 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
location_dest.PlacedFootprint = render_target->footprints[0]; location_dest.PlacedFootprint = render_target->footprints[0];
// TODO(Triang3l): Box for color render targets. // TODO(Triang3l): Box for color render targets.
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, command_list->CopyTexture(location_dest, location_source);
nullptr);
EDRAMLoadStoreRootConstants root_constants; EDRAMLoadStoreRootConstants root_constants;
uint32_t rt_pitch_tiles = surface_pitch_tiles; uint32_t rt_pitch_tiles = surface_pitch_tiles;
if (!render_target->key.is_depth && if (!render_target->key.is_depth &&
@ -2481,8 +2468,7 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
root_constants.base_samples_2x_depth_pitch |= 1 << 15; root_constants.base_samples_2x_depth_pitch |= 1 << 15;
location_source.SubresourceIndex = 1; location_source.SubresourceIndex = 1;
location_dest.PlacedFootprint = render_target->footprints[1]; location_dest.PlacedFootprint = render_target->footprints[1];
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, command_list->CopyTexture(location_dest, location_source);
nullptr);
root_constants.rt_stencil_offset = root_constants.rt_stencil_offset =
uint32_t(location_dest.PlacedFootprint.Offset); uint32_t(location_dest.PlacedFootprint.Offset);
root_constants.rt_stencil_pitch = root_constants.rt_stencil_pitch =
@ -2497,14 +2483,14 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
command_processor_->SubmitBarriers(); command_processor_->SubmitBarriers();
// Store the data. // Store the data.
command_list->SetComputeRoot32BitConstants( command_list->D3DSetComputeRoot32BitConstants(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EDRAMLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth, EDRAMLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format); render_target->key.format);
command_processor_->SetComputePipeline( command_processor_->SetComputePipeline(
edram_store_pipelines_[size_t(mode)]); edram_store_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples. // 1 group per 80x16 samples.
command_list->Dispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1); command_list->D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1);
// Commit the UAV write. // Commit the UAV write.
command_processor_->PushUAVBarrier(edram_buffer_); command_processor_->PushUAVBarrier(edram_buffer_);
@ -2521,10 +2507,7 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
return; return;
} }
auto command_list = command_processor_->GetCurrentCommandList(); auto command_list = command_processor_->GetDeferredCommandList();
if (command_list == nullptr) {
return;
}
// Allocate descriptors for the buffers. // Allocate descriptors for the buffers.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
@ -2562,13 +2545,13 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
// Set up the bindings. // Set up the bindings.
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice(); auto device = provider->GetDevice();
command_list->SetComputeRootSignature(edram_load_store_root_signature_); command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_);
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start, ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start,
edram_buffer_, GetEDRAMBufferSize()); edram_buffer_, GetEDRAMBufferSize());
ui::d3d12::util::CreateRawBufferUAV( ui::d3d12::util::CreateRawBufferUAV(
device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1), device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
copy_buffer, copy_buffer_size); copy_buffer, copy_buffer_size);
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
// Load each render target. // Load each render target.
for (uint32_t i = 0; i < render_target_count; ++i) { for (uint32_t i = 0; i < render_target_count; ++i) {
@ -2615,13 +2598,14 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
root_constants.rt_stencil_pitch = root_constants.rt_stencil_pitch =
render_target->footprints[1].Footprint.RowPitch; render_target->footprints[1].Footprint.RowPitch;
} }
command_list->SetComputeRoot32BitConstants( command_list->D3DSetComputeRoot32BitConstants(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EDRAMLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth, EDRAMLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format); render_target->key.format);
command_processor_->SetComputePipeline(edram_load_pipelines_[size_t(mode)]); command_processor_->SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples. // 1 group per 80x16 samples.
command_list->Dispatch(render_target->key.width_ss_div_80, edram_rows, 1); command_list->D3DDispatch(render_target->key.width_ss_div_80, edram_rows,
1);
// Commit the UAV write and transition the copy buffer to copy source now. // Commit the UAV write and transition the copy buffer to copy source now.
command_processor_->PushUAVBarrier(copy_buffer); command_processor_->PushUAVBarrier(copy_buffer);
@ -2638,13 +2622,11 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
location_dest.pResource = render_target->resource; location_dest.pResource = render_target->resource;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
location_dest.SubresourceIndex = 0; location_dest.SubresourceIndex = 0;
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, command_list->CopyTexture(location_dest, location_source);
nullptr);
if (render_target->key.is_depth) { if (render_target->key.is_depth) {
location_source.PlacedFootprint = render_target->footprints[1]; location_source.PlacedFootprint = render_target->footprints[1];
location_dest.SubresourceIndex = 1; location_dest.SubresourceIndex = 1;
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, command_list->CopyTexture(location_dest, location_source);
nullptr);
} }
} }

View File

@ -319,10 +319,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
} }
uint32_t last = start + length - 1; uint32_t last = start + length - 1;
auto command_list = command_processor_->GetCurrentCommandList(); auto command_list = command_processor_->GetDeferredCommandList();
if (command_list == nullptr) {
return false;
}
#if FINE_GRAINED_DRAW_SCOPES #if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
@ -360,7 +357,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
upload_buffer_mapping, upload_buffer_mapping,
memory_->TranslatePhysical(upload_range_start << page_size_log2_), memory_->TranslatePhysical(upload_range_start << page_size_log2_),
upload_buffer_size); upload_buffer_size);
command_list->CopyBufferRegion( command_list->D3DCopyBufferRegion(
buffer_, upload_range_start << page_size_log2_, upload_buffer, buffer_, upload_range_start << page_size_log2_, upload_buffer,
upload_buffer_offset, upload_buffer_size); upload_buffer_offset, upload_buffer_size);
upload_range_start += upload_buffer_pages; upload_range_start += upload_buffer_pages;

View File

@ -644,10 +644,6 @@ void TextureCache::EndFrame() {
void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask, void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
uint32_t used_pixel_texture_mask) { uint32_t used_pixel_texture_mask) {
auto command_list = command_processor_->GetCurrentCommandList();
if (command_list == nullptr) {
return;
}
auto& regs = *register_file_; auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES #if FINE_GRAINED_DRAW_SCOPES
@ -1061,10 +1057,7 @@ bool TextureCache::TileResolvedTexture(
const ResolveTileModeInfo& resolve_tile_mode_info = const ResolveTileModeInfo& resolve_tile_mode_info =
resolve_tile_mode_info_[uint32_t(resolve_tile_mode)]; resolve_tile_mode_info_[uint32_t(resolve_tile_mode)];
auto command_list = command_processor_->GetCurrentCommandList(); auto command_list = command_processor_->GetDeferredCommandList();
if (command_list == nullptr) {
return false;
}
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice(); auto device = provider->GetDevice();
uint32_t resolution_scale_log2 = IsResolutionScale2X() ? 1 : 0; uint32_t resolution_scale_log2 = IsResolutionScale2X() ? 1 : 0;
@ -1122,7 +1115,7 @@ bool TextureCache::TileResolvedTexture(
shared_memory_->UseForWriting(); shared_memory_->UseForWriting();
} }
command_processor_->SubmitBarriers(); command_processor_->SubmitBarriers();
command_list->SetComputeRootSignature(resolve_tile_root_signature_); command_list->D3DSetComputeRootSignature(resolve_tile_root_signature_);
ResolveTileConstants resolve_tile_constants; ResolveTileConstants resolve_tile_constants;
resolve_tile_constants.info = uint32_t(endian) | (uint32_t(format) << 3) | resolve_tile_constants.info = uint32_t(endian) | (uint32_t(format) << 3) |
(resolution_scale_log2 << 9) | (resolution_scale_log2 << 9) |
@ -1170,17 +1163,17 @@ bool TextureCache::TileResolvedTexture(
shared_memory_->CreateRawUAV(descriptor_cpu_uav); shared_memory_->CreateRawUAV(descriptor_cpu_uav);
} }
} }
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
command_list->SetComputeRoot32BitConstants( command_list->D3DSetComputeRoot32BitConstants(
0, sizeof(resolve_tile_constants) / sizeof(uint32_t), 0, sizeof(resolve_tile_constants) / sizeof(uint32_t),
&resolve_tile_constants, 0); &resolve_tile_constants, 0);
command_processor_->SetComputePipeline( command_processor_->SetComputePipeline(
resolve_tile_pipelines_[uint32_t(resolve_tile_mode)]); resolve_tile_pipelines_[uint32_t(resolve_tile_mode)]);
// Each group processes 32x32 texels after resolution scaling has been // Each group processes 32x32 texels after resolution scaling has been
// applied. // applied.
command_list->Dispatch(((resolve_width << resolution_scale_log2) + 31) >> 5, command_list->D3DDispatch(
((resolve_height << resolution_scale_log2) + 31) >> 5, ((resolve_width << resolution_scale_log2) + 31) >> 5,
1); ((resolve_height << resolution_scale_log2) + 31) >> 5, 1);
// Commit the write. // Commit the write.
command_processor_->PushUAVBarrier(resolution_scale_log2 command_processor_->PushUAVBarrier(resolution_scale_log2
@ -1687,10 +1680,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
return true; return true;
} }
auto command_list = command_processor_->GetCurrentCommandList(); auto command_list = command_processor_->GetDeferredCommandList();
if (command_list == nullptr) {
return false;
}
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice(); auto device = provider->GetDevice();
@ -1818,10 +1808,10 @@ bool TextureCache::LoadTextureData(Texture* texture) {
copy_buffer, uint32_t(host_slice_size)); copy_buffer, uint32_t(host_slice_size));
} }
command_processor_->SetComputePipeline(pipeline); command_processor_->SetComputePipeline(pipeline);
command_list->SetComputeRootSignature(load_root_signature_); command_list->D3DSetComputeRootSignature(load_root_signature_);
if (!separate_base_and_mips_descriptors) { if (!separate_base_and_mips_descriptors) {
// Will be bound later. // Will be bound later.
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
} }
// Submit commands. // Submit commands.
@ -1896,12 +1886,13 @@ bool TextureCache::LoadTextureData(Texture* texture) {
return false; return false;
} }
std::memcpy(cbuffer_mapping, &load_constants, sizeof(load_constants)); std::memcpy(cbuffer_mapping, &load_constants, sizeof(load_constants));
command_list->SetComputeRootConstantBufferView(0, cbuffer_gpu_address); command_list->D3DSetComputeRootConstantBufferView(0, cbuffer_gpu_address);
if (separate_base_and_mips_descriptors) { if (separate_base_and_mips_descriptors) {
if (j == 0) { if (j == 0) {
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start); command_list->D3DSetComputeRootDescriptorTable(1,
descriptor_gpu_start);
} else if (j == 1) { } else if (j == 1) {
command_list->SetComputeRootDescriptorTable( command_list->D3DSetComputeRootDescriptorTable(
1, provider->OffsetViewDescriptor(descriptor_gpu_start, 2)); 1, provider->OffsetViewDescriptor(descriptor_gpu_start, 2));
} }
} }
@ -1916,8 +1907,8 @@ bool TextureCache::LoadTextureData(Texture* texture) {
} }
group_count_x = (group_count_x + 31) >> 5; group_count_x = (group_count_x + 31) >> 5;
group_count_y = (group_count_y + 31) >> 5; group_count_y = (group_count_y + 31) >> 5;
command_list->Dispatch(group_count_x, group_count_y, command_list->D3DDispatch(group_count_x, group_count_y,
load_constants.size_blocks[2]); load_constants.size_blocks[2]);
} }
command_processor_->PushUAVBarrier(copy_buffer); command_processor_->PushUAVBarrier(copy_buffer);
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state, command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
@ -1933,8 +1924,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
location_dest.pResource = texture->resource; location_dest.pResource = texture->resource;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
location_dest.SubresourceIndex = slice_first_subresource + j; location_dest.SubresourceIndex = slice_first_subresource + j;
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, command_list->CopyTexture(location_dest, location_source);
nullptr);
} }
} }