diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 210ac4fc3..84c7d2395 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -9,8 +9,11 @@ #include "xenia/gpu/d3d12/d3d12_command_processor.h" +#include + #include "xenia/base/assert.h" #include "xenia/base/logging.h" +#include "xenia/base/math.h" #include "xenia/base/profiling.h" #include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/xenos.h" @@ -29,6 +32,303 @@ void D3D12CommandProcessor::ClearCaches() { cache_clear_requested_ = true; } +ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( + const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) { + assert_true(vertex_shader->is_translated()); + assert_true(pixel_shader == nullptr || pixel_shader->is_translated()); + + uint32_t pixel_textures = + pixel_shader != nullptr ? pixel_shader->GetTextureSRVCount() : 0; + uint32_t pixel_samplers = + pixel_shader != nullptr ? pixel_shader->GetSamplerCount() : 0; + uint32_t vertex_textures = vertex_shader->GetTextureSRVCount(); + uint32_t vertex_samplers = vertex_shader->GetSamplerCount(); + // Max 96 textures (if all kinds of tfetch instructions are used for all fetch + // registers) and 32 samplers (one sampler per used fetch), but different + // shader stages have different texture sets. + uint32_t index = pixel_textures | (pixel_samplers << 7) | + (vertex_textures << 12) | (vertex_samplers << 19); + + // Try an existing root signature. + auto it = root_signatures_.find(index); + if (it != root_signatures_.end()) { + return it->second; + } + + // Create a new one. + D3D12_ROOT_SIGNATURE_DESC desc; + D3D12_ROOT_PARAMETER parameters[kRootParameter_Count_TwoStageTextures]; + D3D12_DESCRIPTOR_RANGE ranges[kRootParameter_Count_TwoStageTextures]; + desc.NumParameters = kRootParameter_Count_NoTextures; + desc.pParameters = parameters; + desc.NumStaticSamplers = 0; + desc.pStaticSamplers = nullptr; + desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + // Fetch constants. + { + auto& parameter = parameters[kRootParameter_FetchConstants]; + auto& range = ranges[kRootParameter_FetchConstants]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + range.NumDescriptors = 1; + range.BaseShaderRegister = 10; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + + // Vertex float constants. + { + auto& parameter = parameters[kRootParameter_VertexFloatConstants]; + auto& range = ranges[kRootParameter_VertexFloatConstants]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + range.NumDescriptors = 8; + range.BaseShaderRegister = 2; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + + // Pixel float constants. + { + auto& parameter = parameters[kRootParameter_PixelFloatConstants]; + auto& range = ranges[kRootParameter_PixelFloatConstants]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + range.NumDescriptors = 8; + range.BaseShaderRegister = 2; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + + // Common constants - system and loop/bool. + { + auto& parameter = parameters[kRootParameter_CommonConstants]; + auto& range = ranges[kRootParameter_CommonConstants]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + range.NumDescriptors = 2; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + + // Shared memory. + { + auto& parameter = parameters[kRootParameter_SharedMemory]; + auto& range = ranges[kRootParameter_SharedMemory]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = 1; + range.BaseShaderRegister = 0; + range.RegisterSpace = 1; + range.OffsetInDescriptorsFromTableStart = 0; + } + + if (pixel_textures > 0 || vertex_textures > 0) { + desc.NumParameters = kRootParameter_Count_OneStageTextures; + + // Pixel or vertex textures. + { + auto& parameter = parameters[kRootParameter_PixelOrVertexTextures]; + auto& range = ranges[kRootParameter_PixelOrVertexTextures]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + if (pixel_textures > 0) { + assert_true(pixel_samplers > 0); + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + range.NumDescriptors = pixel_textures; + } else { + assert_true(vertex_samplers > 0); + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.NumDescriptors = vertex_textures; + } + } + + // Pixel or vertex samplers. + { + auto& parameter = parameters[kRootParameter_PixelOrVertexSamplers]; + auto& range = ranges[kRootParameter_PixelOrVertexSamplers]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + if (pixel_samplers > 0) { + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + range.NumDescriptors = pixel_samplers; + } else { + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.NumDescriptors = vertex_samplers; + } + } + + if (pixel_textures > 0 && vertex_textures > 0) { + assert_true(vertex_samplers > 0); + + desc.NumParameters = UINT(kRootParameter_Count_TwoStageTextures); + + // Vertex textures. + { + auto& parameter = parameters[kRootParameter_VertexTextures]; + auto& range = ranges[kRootParameter_VertexTextures]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = vertex_textures; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + + // Vertex samplers. + { + auto& parameter = parameters[kRootParameter_VertexSamplers]; + auto& range = ranges[kRootParameter_VertexSamplers]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + range.NumDescriptors = vertex_samplers; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + } + } + + ID3DBlob* blob; + ID3DBlob* error_blob = nullptr; + if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, + &blob, &error_blob))) { + XELOGE( + "Failed to serialize a root signature with %u pixel textures, %u " + "pixel samplers, %u vertex textures and %u vertex samplers", + pixel_textures, pixel_samplers, vertex_textures, vertex_samplers); + if (error_blob != nullptr) { + XELOGE("%s", + reinterpret_cast(error_blob->GetBufferPointer())); + error_blob->Release(); + } + return nullptr; + } + if (error_blob != nullptr) { + error_blob->Release(); + } + + auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); + ID3D12RootSignature* root_signature; + if (FAILED(device->CreateRootSignature(0, blob->GetBufferPointer(), + blob->GetBufferSize(), + IID_PPV_ARGS(&root_signature)))) { + XELOGE( + "Failed to create a root signature with %u pixel textures, %u pixel " + "samplers, %u vertex textures and %u vertex samplers", + pixel_textures, pixel_samplers, vertex_textures, vertex_samplers); + blob->Release(); + return nullptr; + } + blob->Release(); + + root_signatures_.insert({index, root_signature}); + return root_signature; +} + +uint64_t D3D12CommandProcessor::RequestViewDescriptors( + uint64_t previous_full_update, uint32_t count_for_partial_update, + uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, + D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) { + uint32_t descriptor_index; + uint64_t current_full_update = + view_heap_pool_->Request(previous_full_update, count_for_partial_update, + count_for_full_update, descriptor_index); + if (current_full_update == 0) { + // There was an error. + return 0; + } + ID3D12DescriptorHeap* heap = view_heap_pool_->GetLastRequestHeap(); + if (current_view_heap_ != heap) { + // Bind the new descriptor heaps if needed. + current_view_heap_ = heap; + ID3D12DescriptorHeap* heaps[2]; + uint32_t heap_count = 0; + heaps[heap_count++] = heap; + if (current_sampler_heap_ != nullptr) { + heaps[heap_count++] = current_sampler_heap_; + } + command_lists_[current_queue_frame_]->GetCommandList()->SetDescriptorHeaps( + heap_count, heaps); + } + uint32_t descriptor_offset = + descriptor_index * + GetD3D12Context()->GetD3D12Provider()->GetDescriptorSizeView(); + cpu_handle_out.ptr = + view_heap_pool_->GetLastRequestHeapCPUStart().ptr + descriptor_offset; + gpu_handle_out.ptr = + view_heap_pool_->GetLastRequestHeapGPUStart().ptr + descriptor_offset; + return current_full_update; +} + +uint64_t D3D12CommandProcessor::RequestSamplerDescriptors( + uint64_t previous_full_update, uint32_t count_for_partial_update, + uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, + D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) { + uint32_t descriptor_index; + uint64_t current_full_update = sampler_heap_pool_->Request( + previous_full_update, count_for_partial_update, count_for_full_update, + descriptor_index); + if (current_full_update == 0) { + // There was an error. + return 0; + } + ID3D12DescriptorHeap* heap = sampler_heap_pool_->GetLastRequestHeap(); + if (current_sampler_heap_ != heap) { + // Bind the new descriptor heaps if needed. + current_sampler_heap_ = heap; + ID3D12DescriptorHeap* heaps[2]; + uint32_t heap_count = 0; + heaps[heap_count++] = heap; + if (current_view_heap_ != nullptr) { + heaps[heap_count++] = current_view_heap_; + } + command_lists_[current_queue_frame_]->GetCommandList()->SetDescriptorHeaps( + heap_count, heaps); + } + uint32_t descriptor_offset = + descriptor_index * + GetD3D12Context()->GetD3D12Provider()->GetDescriptorSizeSampler(); + cpu_handle_out.ptr = + view_heap_pool_->GetLastRequestHeapCPUStart().ptr + descriptor_offset; + gpu_handle_out.ptr = + view_heap_pool_->GetLastRequestHeapGPUStart().ptr + descriptor_offset; + return current_full_update; +} + bool D3D12CommandProcessor::SetupContext() { if (!CommandProcessor::SetupContext()) { XELOGE("Failed to initialize base command processor context"); @@ -51,13 +351,22 @@ bool D3D12CommandProcessor::SetupContext() { } } + constant_buffer_pool_ = + std::make_unique(context, 1024 * 1024); + view_heap_pool_ = std::make_unique( + context, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 32768); + // Can't create a shader-visible heap with more than 2048 samplers. + sampler_heap_pool_ = std::make_unique( + context, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2048); + shared_memory_ = std::make_unique(memory_, context); if (!shared_memory_->Initialize()) { XELOGE("Failed to initialize shared memory"); return false; } - pipeline_cache_ = std::make_unique(register_file_, context); + pipeline_cache_ = + std::make_unique(this, register_file_, context); return true; } @@ -66,7 +375,18 @@ void D3D12CommandProcessor::ShutdownContext() { auto context = GetD3D12Context(); context->AwaitAllFramesCompletion(); + sampler_heap_pool_.reset(); + view_heap_pool_.reset(); + constant_buffer_pool_.reset(); + pipeline_cache_.reset(); + + // Root signatured are used by pipelines, thus freed after the pipelines. + for (auto it : root_signatures_) { + it.second->Release(); + } + root_signatures_.clear(); + shared_memory_.reset(); for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) { @@ -77,6 +397,22 @@ void D3D12CommandProcessor::ShutdownContext() { CommandProcessor::ShutdownContext(); } +void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { + CommandProcessor::WriteRegister(index, value); + + if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X && + index <= XE_GPU_REG_SHADER_CONSTANT_511_W) { + uint32_t component_index = index - XE_GPU_REG_SHADER_CONSTANT_000_X; + cbuffer_bindings_float_[component_index >> 7].up_to_date = false; + } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && + index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { + cbuffer_bindings_bool_loop_.up_to_date = false; + } else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 && + index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { + cbuffer_bindings_fetch_.up_to_date = false; + } +} + void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) { @@ -87,7 +423,20 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, if (cache_clear_requested_) { cache_clear_requested_ = false; GetD3D12Context()->AwaitAllFramesCompletion(); + + sampler_heap_pool_->ClearCache(); + view_heap_pool_->ClearCache(); + constant_buffer_pool_->ClearCache(); + pipeline_cache_->ClearCache(); + + for (auto it : root_signatures_) { + it.second->Release(); + } + root_signatures_.clear(); + + // TODO(Triang3l): Shared memory cache clear. + // shared_memory_->ClearCache(); } } @@ -102,6 +451,7 @@ Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type, bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) { + auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); auto& regs = *register_file_; #if FINE_GRAINED_DRAW_SCOPES @@ -145,8 +495,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, return true; } - bool full_update = BeginFrame(); + bool new_frame = BeginFrame(); + ID3D12GraphicsCommandList* command_list = + command_lists_[current_queue_frame_]->GetCommandList(); + // Get the pipeline and translate the shaders so used textures are known. ID3D12PipelineState* pipeline; ID3D12RootSignature* root_signature; auto pipeline_status = pipeline_cache_->ConfigurePipeline( @@ -158,6 +511,18 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, return false; } + // Bind the pipeline. + if (current_pipeline_ != pipeline) { + current_pipeline_ = pipeline; + command_list->SetPipelineState(pipeline); + } + + // Update constant buffers, descriptors and root parameters. + if (!UpdateBindings(command_list, vertex_shader, pixel_shader, + root_signature)) { + return false; + } + // Shared memory test. if (index_buffer_info != nullptr && index_buffer_info->guest_base != 0) { uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32 @@ -181,6 +546,21 @@ bool D3D12CommandProcessor::BeginFrame() { context->BeginSwap(); current_queue_frame_ = context->GetCurrentQueueFrame(); + // Reset bindings, particularly because the buffers backing them are recycled. + current_pipeline_ = nullptr; + current_graphics_root_signature_ = nullptr; + current_graphics_root_up_to_date_ = 0; + current_view_heap_ = nullptr; + current_sampler_heap_ = nullptr; + cbuffer_bindings_system_.up_to_date = false; + for (uint32_t i = 0; i < xe::countof(cbuffer_bindings_float_); ++i) { + cbuffer_bindings_float_[i].up_to_date = false; + } + cbuffer_bindings_bool_loop_.up_to_date = false; + cbuffer_bindings_fetch_.up_to_date = false; + draw_view_full_update_ = 0; + draw_sampler_full_update_ = 0; + command_lists_setup_[current_queue_frame_]->BeginRecording(); command_lists_[current_queue_frame_]->BeginRecording(); @@ -214,6 +594,237 @@ bool D3D12CommandProcessor::EndFrame() { return true; } +bool D3D12CommandProcessor::UpdateBindings( + ID3D12GraphicsCommandList* command_list, const D3D12Shader* vertex_shader, + const D3D12Shader* pixel_shader, ID3D12RootSignature* root_signature) { + auto provider = GetD3D12Context()->GetD3D12Provider(); + auto device = provider->GetDevice(); + auto& regs = *register_file_; + + // Bind the new root signature. + if (current_graphics_root_signature_ != root_signature) { + current_graphics_root_signature_ = root_signature; + // We don't know which root parameters are up to date anymore. + current_graphics_root_up_to_date_ = 0; + command_list->SetGraphicsRootSignature(root_signature); + } + + // Begin updating descriptors. + bool write_common_constant_views = false; + bool write_vertex_float_constant_views = false; + bool write_pixel_float_constant_views = false; + bool write_fetch_constant_view = false; + + // Update constant buffers. + // TODO(Triang3l): Update the system constant buffer - will crash without it. + ID3D12Resource* constant_buffer; + uint32_t constant_buffer_offset; + if (!cbuffer_bindings_system_.up_to_date) { + uint8_t* system_constants = constant_buffer_pool_->RequestFull( + xe::align(uint32_t(sizeof(cbuffer_system_)), 256u), constant_buffer, + constant_buffer_offset); + if (system_constants == nullptr) { + return false; + } + std::memcpy(system_constants, &cbuffer_system_, sizeof(cbuffer_system_)); + cbuffer_bindings_system_.buffer_address = + constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset; + cbuffer_bindings_system_.up_to_date = true; + write_common_constant_views = true; + } + if (!cbuffer_bindings_bool_loop_.up_to_date) { + uint8_t* bool_loop_constants = constant_buffer_pool_->RequestFull( + 256, constant_buffer, constant_buffer_offset); + if (bool_loop_constants == nullptr) { + return false; + } + std::memcpy(bool_loop_constants, + ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + 40 * sizeof(uint32_t)); + cbuffer_bindings_bool_loop_.buffer_address = + constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset; + cbuffer_bindings_bool_loop_.up_to_date = true; + write_common_constant_views = true; + } + for (uint32_t i = 0; i < 16; ++i) { + ConstantBufferBinding& float_binding = cbuffer_bindings_float_[i]; + if (float_binding.up_to_date) { + continue; + } + uint8_t* float_constants = constant_buffer_pool_->RequestFull( + 512, constant_buffer, constant_buffer_offset); + if (float_constants == nullptr) { + return false; + } + std::memcpy(float_constants, + ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 7)].f32, + 32 * 4 * sizeof(uint32_t)); + float_binding.buffer_address = + constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset; + float_binding.up_to_date = true; + if (i < 8) { + write_vertex_float_constant_views = true; + } else { + write_pixel_float_constant_views = true; + } + } + if (!cbuffer_bindings_fetch_.up_to_date) { + uint8_t* fetch_constants = constant_buffer_pool_->RequestFull( + 768, constant_buffer, constant_buffer_offset); + if (fetch_constants == nullptr) { + return false; + } + std::memcpy(fetch_constants, + ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, + 32 * 6 * sizeof(uint32_t)); + cbuffer_bindings_fetch_.buffer_address = + constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset; + cbuffer_bindings_fetch_.up_to_date = true; + write_fetch_constant_view = true; + } + + // Update the descriptors. + uint32_t view_count_partial_update = 0; + if (write_common_constant_views) { + // System and bool/loop constants. + view_count_partial_update += 2; + } + if (write_vertex_float_constant_views) { + // Vertex float constants. + view_count_partial_update += 8; + } + if (write_pixel_float_constant_views) { + // Pixel float constants. + view_count_partial_update += 8; + } + if (write_fetch_constant_view) { + // Fetch constants. + ++view_count_partial_update; + } + // All the constants + shared memory. + uint32_t view_count_full_update = 20; + D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; + D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle; + uint32_t view_handle_size = provider->GetDescriptorSizeView(); + uint64_t view_full_update_index = RequestViewDescriptors( + draw_view_full_update_, view_count_partial_update, view_count_full_update, + view_cpu_handle, view_gpu_handle); + if (view_full_update_index == 0) { + return false; + } + if (draw_view_full_update_ != view_full_update_index) { + // Need to update all descriptors. + draw_view_full_update_ = view_full_update_index; + write_common_constant_views = true; + write_vertex_float_constant_views = true; + write_pixel_float_constant_views = true; + write_fetch_constant_view = true; + // If updating fully, write the shared memory descriptor (t0, space1). + shared_memory_->CreateSRV(view_cpu_handle); + gpu_handle_shared_memory_ = view_gpu_handle; + view_cpu_handle.ptr += view_handle_size; + view_gpu_handle.ptr += view_handle_size; + current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_SharedMemory); + } + D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_desc; + if (write_common_constant_views) { + gpu_handle_common_constants_ = view_gpu_handle; + // System constants (b0). + constant_buffer_desc.BufferLocation = + cbuffer_bindings_system_.buffer_address; + constant_buffer_desc.SizeInBytes = + xe::align(uint32_t(sizeof(cbuffer_system_)), 256u); + device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); + view_cpu_handle.ptr += view_handle_size; + view_gpu_handle.ptr += view_handle_size; + // Bool/loop constants (b1). + constant_buffer_desc.BufferLocation = + cbuffer_bindings_bool_loop_.buffer_address; + constant_buffer_desc.SizeInBytes = 256; + device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); + view_cpu_handle.ptr += view_handle_size; + view_gpu_handle.ptr += view_handle_size; + current_graphics_root_up_to_date_ &= + ~(1u << kRootParameter_CommonConstants); + } + if (write_vertex_float_constant_views) { + gpu_handle_vertex_float_constants_ = view_gpu_handle; + // Vertex float constants (b2-b9). + for (uint32_t i = 0; i < 8; ++i) { + constant_buffer_desc.BufferLocation = + cbuffer_bindings_float_[i].buffer_address; + constant_buffer_desc.SizeInBytes = 512; + device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); + view_cpu_handle.ptr += view_handle_size; + view_gpu_handle.ptr += view_handle_size; + } + current_graphics_root_up_to_date_ &= + ~(1u << kRootParameter_VertexFloatConstants); + } + if (write_pixel_float_constant_views) { + gpu_handle_pixel_float_constants_ = view_gpu_handle; + // Pixel float constants (b2-b9). + for (uint32_t i = 0; i < 8; ++i) { + constant_buffer_desc.BufferLocation = + cbuffer_bindings_float_[8 + i].buffer_address; + constant_buffer_desc.SizeInBytes = 512; + device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); + view_cpu_handle.ptr += view_handle_size; + view_gpu_handle.ptr += view_handle_size; + } + current_graphics_root_up_to_date_ &= + ~(1u << kRootParameter_PixelFloatConstants); + } + if (write_fetch_constant_view) { + gpu_handle_fetch_constants_ = view_gpu_handle; + // Fetch constants (b10). + constant_buffer_desc.BufferLocation = + cbuffer_bindings_fetch_.buffer_address; + constant_buffer_desc.SizeInBytes = 768; + device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); + view_cpu_handle.ptr += view_handle_size; + view_gpu_handle.ptr += view_handle_size; + current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_FetchConstants); + } + + // Update the root parameters. + if (!(current_graphics_root_up_to_date_ & + (1u << kRootParameter_FetchConstants))) { + command_list->SetGraphicsRootDescriptorTable(kRootParameter_FetchConstants, + gpu_handle_fetch_constants_); + current_graphics_root_up_to_date_ |= 1u << kRootParameter_FetchConstants; + } + if (!(current_graphics_root_up_to_date_ & + (1u << kRootParameter_VertexFloatConstants))) { + command_list->SetGraphicsRootDescriptorTable( + kRootParameter_VertexFloatConstants, + gpu_handle_vertex_float_constants_); + current_graphics_root_up_to_date_ |= 1u + << kRootParameter_VertexFloatConstants; + } + if (!(current_graphics_root_up_to_date_ & + (1u << kRootParameter_PixelFloatConstants))) { + command_list->SetGraphicsRootDescriptorTable( + kRootParameter_PixelFloatConstants, gpu_handle_pixel_float_constants_); + current_graphics_root_up_to_date_ |= 1u + << kRootParameter_PixelFloatConstants; + } + if (!(current_graphics_root_up_to_date_ & + (1u << kRootParameter_CommonConstants))) { + command_list->SetGraphicsRootDescriptorTable(kRootParameter_CommonConstants, + gpu_handle_common_constants_); + current_graphics_root_up_to_date_ |= 1u << kRootParameter_CommonConstants; + } + if (!(current_graphics_root_up_to_date_ & + (1u << kRootParameter_SharedMemory))) { + command_list->SetGraphicsRootDescriptorTable(kRootParameter_SharedMemory, + gpu_handle_shared_memory_); + current_graphics_root_up_to_date_ |= 1u << kRootParameter_SharedMemory; + } + + return true; +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 49e7f958f..3796450c1 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -11,6 +11,7 @@ #define XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_ #include +#include #include "xenia/gpu/command_processor.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h" @@ -20,6 +21,7 @@ #include "xenia/kernel/kernel_state.h" #include "xenia/ui/d3d12/command_list.h" #include "xenia/ui/d3d12/d3d12_context.h" +#include "xenia/ui/d3d12/pools.h" namespace xe { namespace gpu { @@ -38,10 +40,29 @@ class D3D12CommandProcessor : public CommandProcessor { return static_cast(context_.get()); } + // Finds or creates root signature for a pipeline. + ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader, + const D3D12Shader* pixel_shader); + + // Request and automatically rebind descriptors on the draw command list. + // Refer to DescriptorHeapPool::Request for partial/full update explanation. + uint64_t RequestViewDescriptors(uint64_t previous_full_update, + uint32_t count_for_partial_update, + uint32_t count_for_full_update, + D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, + D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out); + uint64_t RequestSamplerDescriptors( + uint64_t previous_full_update, uint32_t count_for_partial_update, + uint32_t count_for_full_update, + D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, + D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out); + protected: bool SetupContext() override; void ShutdownContext() override; + void WriteRegister(uint32_t index, uint32_t value) override; + void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) override; @@ -54,11 +75,61 @@ class D3D12CommandProcessor : public CommandProcessor { bool IssueCopy() override; private: + enum RootParameter : UINT { + // These are always present. + + // Very frequently changed, especially for UI draws, and for models drawn in + // multiple parts - contains fetch constants with vertex addresses (b10). + kRootParameter_FetchConstants, + // Quite frequently changed (for one object drawn multiple times, for + // instance - may contain projection matrices) - 8 pages of float constants + // (b2-b9). + kRootParameter_VertexFloatConstants, + // Less frequently changed (per-material) - 8 pages of float constants + // (b2-b9). + kRootParameter_PixelFloatConstants, + // Rarely changed - system constants like viewport and alpha testing (b0) + // and loop and bool constants (b1). + kRootParameter_CommonConstants, + // Never changed - shared memory byte address buffer (t0, space1). + kRootParameter_SharedMemory, + + kRootParameter_Count_NoTextures, + + // These are there only if textures are fetched (they are changed pretty + // frequently, but for the ease of maintenance they're in the end). + // If the pixel shader samples textures, these are for pixel textures + // (changed more frequently), otherwise, if the vertex shader samples + // textures, these are for vertex textures. + + // Used textures of all types (t0+, space0). + kRootParameter_PixelOrVertexTextures = kRootParameter_Count_NoTextures, + // Used samplers (s0+). + kRootParameter_PixelOrVertexSamplers, + + kRootParameter_Count_OneStageTextures, + + // These are only present if both pixel and vertex shaders sample textures + // for vertex textures. + + // Used textures of all types (t0+, space0). + kRootParameter_VertexTextures = kRootParameter_Count_OneStageTextures, + // Used samplers (s0+). + kRootParameter_VertexSamplers, + + kRootParameter_Count_TwoStageTextures, + }; + // Returns true if a new frame was started. bool BeginFrame(); // Returns true if an open frame was ended. bool EndFrame(); + bool UpdateBindings(ID3D12GraphicsCommandList* command_list, + const D3D12Shader* vertex_shader, + const D3D12Shader* pixel_shader, + ID3D12RootSignature* root_signature); + bool cache_clear_requested_ = false; std::unique_ptr @@ -67,9 +138,58 @@ class D3D12CommandProcessor : public CommandProcessor { command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {}; std::unique_ptr shared_memory_ = nullptr; + + // Root signatures for different descriptor counts. + std::unordered_map root_signatures_; + std::unique_ptr pipeline_cache_ = nullptr; + std::unique_ptr constant_buffer_pool_ = nullptr; + std::unique_ptr view_heap_pool_ = nullptr; + std::unique_ptr sampler_heap_pool_ = nullptr; + uint32_t current_queue_frame_ = UINT32_MAX; + + // Currently bound graphics or compute pipeline. + ID3D12PipelineState* current_pipeline_; + // Currently bound graphics root signature. + ID3D12RootSignature* current_graphics_root_signature_; + // Whether root parameters are up to date - reset if a new signature is bound. + uint32_t current_graphics_root_up_to_date_; + + // Currently bound descriptor heaps - update by RequestViewDescriptors and + // RequestSamplerDescriptors. + ID3D12DescriptorHeap* current_view_heap_; + ID3D12DescriptorHeap* current_sampler_heap_; + + // System shader constants. + struct SystemConstants { + float viewport_inv_scale_x; + float viewport_inv_scale_y; + uint32_t vertex_index_endian; + uint32_t textures_are_3d; + } cbuffer_system_; + + // Constant buffer bindings. + struct ConstantBufferBinding { + D3D12_GPU_VIRTUAL_ADDRESS buffer_address; + bool up_to_date; + }; + ConstantBufferBinding cbuffer_bindings_system_; + ConstantBufferBinding cbuffer_bindings_float_[16]; + ConstantBufferBinding cbuffer_bindings_bool_loop_; + ConstantBufferBinding cbuffer_bindings_fetch_; + + // Pages with the descriptors currently used for handling Xenos draw calls. + uint64_t draw_view_full_update_; + uint64_t draw_sampler_full_update_; + + // Latest descriptor handles used for handling Xenos draw calls. + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_common_constants_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_vertex_float_constants_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_pixel_float_constants_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_fetch_constants_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_; }; } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 8bd79ed84..386e33a32 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -11,10 +11,12 @@ #include #include +#include #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/profiling.h" +#include "xenia/gpu/d3d12/d3d12_command_processor.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/hlsl_shader_translator.h" @@ -22,10 +24,21 @@ namespace xe { namespace gpu { namespace d3d12 { -PipelineCache::PipelineCache(RegisterFile* register_file, +PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor, + RegisterFile* register_file, ui::d3d12::D3D12Context* context) - : register_file_(register_file), context_(context) { + : command_processor_(command_processor), + register_file_(register_file), + context_(context) { shader_translator_.reset(new HlslShaderTranslator()); + + // Set pipeline state description values we never change. + // Zero out tessellation, stream output, blend state and formats for render + // targets 4+, node mask, cached PSO, flags and other things. + std::memset(&update_desc_, 0, sizeof(update_desc_)); + update_desc_.BlendState.IndependentBlendEnable = TRUE; + update_desc_.SampleMask = UINT_MAX; + update_desc_.SampleDesc.Count = 1; } PipelineCache::~PipelineCache() { Shutdown(); } @@ -114,12 +127,6 @@ void PipelineCache::ClearCache() { pipelines_.clear(); COUNT_profile_set("gpu/pipeline_cache/pipelines", 0); - // Destroy all root signatures. - for (auto it : root_signatures_) { - it.second->Release(); - } - root_signatures_.clear(); - // Destroy all shaders. for (auto it : shader_map_) { delete it.second; @@ -259,6 +266,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( return UpdateStatus::kError; } + update_desc_.pRootSignature = + command_processor_->GetRootSignature(vertex_shader, pixel_shader); + if (update_desc_.pRootSignature == nullptr) { + return UpdateStatus::kError; + } update_desc_.VS.pShaderBytecode = vertex_shader->GetDXBC(); update_desc_.VS.BytecodeLength = vertex_shader->GetDXBCSize(); if (pixel_shader != nullptr) { @@ -268,17 +280,9 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( update_desc_.PS.pShaderBytecode = nullptr; update_desc_.PS.BytecodeLength = 0; } - update_desc_.DS.pShaderBytecode = nullptr; - update_desc_.DS.BytecodeLength = 0; - update_desc_.HS.pShaderBytecode = nullptr; - update_desc_.HS.BytecodeLength = 0; // TODO(Triang3l): Geometry shaders. update_desc_.GS.pShaderBytecode = nullptr; update_desc_.GS.BytecodeLength = 0; - update_desc_.pRootSignature = GetRootSignature(vertex_shader, pixel_shader); - if (update_desc_.pRootSignature == nullptr) { - return UpdateStatus::kError; - } update_desc_.PrimitiveTopologyType = primitive_topology_is_line ? D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE : D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; @@ -329,8 +333,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState( return UpdateStatus::kCompatible; } - update_desc_.BlendState.AlphaToCoverageEnable = FALSE; - update_desc_.BlendState.IndependentBlendEnable = TRUE; static const D3D12_BLEND kBlendFactorMap[] = { /* 0 */ D3D12_BLEND_ZERO, /* 1 */ D3D12_BLEND_ONE, @@ -384,11 +386,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState( blend_desc.DestBlendAlpha = D3D12_BLEND_ZERO; blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD; } - blend_desc.LogicOpEnable = FALSE; - blend_desc.LogicOp = D3D12_LOGIC_OP_NOOP; blend_desc.RenderTargetWriteMask = (color_mask >> (i * 4)) & 0xF; } - update_desc_.SampleMask = UINT_MAX; return UpdateStatus::kMismatch; } @@ -516,11 +515,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState( poly_offset_scale * (1.0f / 16.0f); update_desc_.RasterizerState.DepthClipEnable = !depth_clamp_enable ? TRUE : FALSE; - update_desc_.RasterizerState.MultisampleEnable = FALSE; - update_desc_.RasterizerState.AntialiasedLineEnable = FALSE; - update_desc_.RasterizerState.ForcedSampleCount = 0; - update_desc_.RasterizerState.ConservativeRaster = - D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; return UpdateStatus::kMismatch; } @@ -629,21 +623,7 @@ PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) { return it->second; } - // Set the unused fields of the pipeline description. - update_desc_.StreamOutput.pSODeclaration = nullptr; - update_desc_.StreamOutput.NumEntries = 0; - update_desc_.StreamOutput.pBufferStrides = nullptr; - update_desc_.StreamOutput.NumStrides = 0; - update_desc_.StreamOutput.RasterizedStream = 0; - update_desc_.InputLayout.pInputElementDescs = nullptr; - update_desc_.InputLayout.NumElements = 0; - update_desc_.SampleDesc.Count = 1; - update_desc_.SampleDesc.Quality = 0; - update_desc_.NodeMask = 0; - // TODO(Triang3l): Cache create pipelines. - update_desc_.CachedPSO.pCachedBlob = nullptr; - update_desc_.CachedPSO.CachedBlobSizeInBytes = 0; - update_desc_.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + // TODO(Triang3l): Cache create pipelines using CachedPSO. auto device = context_->GetD3D12Provider()->GetDevice(); ID3D12PipelineState* state; @@ -662,217 +642,6 @@ PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) { return pipeline; } -ID3D12RootSignature* PipelineCache::GetRootSignature( - const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) { - uint32_t pixel_textures = - pixel_shader != nullptr ? pixel_shader->GetTextureSRVCount() : 0; - uint32_t pixel_samplers = - pixel_shader != nullptr ? pixel_shader->GetSamplerCount() : 0; - uint32_t vertex_textures = vertex_shader->GetTextureSRVCount(); - uint32_t vertex_samplers = vertex_shader->GetSamplerCount(); - // Max 96 textures (if all kinds of tfetch instructions are used for all fetch - // registers) and 32 samplers (one sampler per used fetch), but different - // shader stages have different texture sets. - uint32_t index = pixel_textures | (pixel_samplers << 7) | - (vertex_textures << 12) | (vertex_samplers << 19); - - // Try an existing root signature. - auto it = root_signatures_.find(index); - if (it != root_signatures_.end()) { - return it->second; - } - - // Create a new one. - D3D12_ROOT_SIGNATURE_DESC desc; - D3D12_ROOT_PARAMETER parameters[RootParameter::kCountWithTwoStageTextures]; - D3D12_DESCRIPTOR_RANGE ranges[RootParameter::kCountWithTwoStageTextures]; - desc.NumParameters = UINT(RootParameter::kCountNoTextures); - desc.pParameters = parameters; - desc.NumStaticSamplers = 0; - desc.pStaticSamplers = nullptr; - desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; - - // Vertex constants - float and fetch. - { - auto& parameter = parameters[size_t(RootParameter::kVertexConstants)]; - auto& range = ranges[size_t(RootParameter::kVertexConstants)]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - range.NumDescriptors = 9; - range.BaseShaderRegister = 2; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - } - - // Pixel constants - float. - { - auto& parameter = parameters[size_t(RootParameter::kPixelConstants)]; - auto& range = ranges[size_t(RootParameter::kPixelConstants)]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - range.NumDescriptors = 8; - range.BaseShaderRegister = 2; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - } - - // Common constants - system and loop/bool. - { - auto& parameter = parameters[size_t(RootParameter::kCommonConstants)]; - auto& range = ranges[size_t(RootParameter::kCommonConstants)]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - range.NumDescriptors = 2; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - } - - // Virtual shared memory. - { - auto& parameter = parameters[size_t(RootParameter::kVirtualMemory)]; - auto& range = ranges[size_t(RootParameter::kVirtualMemory)]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = 1; - range.BaseShaderRegister = 0; - range.RegisterSpace = 1; - range.OffsetInDescriptorsFromTableStart = 0; - } - - if (pixel_textures > 0 || vertex_textures > 0) { - desc.NumParameters = UINT(RootParameter::kCountWithOneStageTextures); - - // Pixel or vertex textures. - { - auto& parameter = - parameters[size_t(RootParameter::kPixelOrVertexTextures)]; - auto& range = ranges[size_t(RootParameter::kPixelOrVertexTextures)]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - if (pixel_textures > 0) { - assert_true(pixel_samplers > 0); - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - range.NumDescriptors = pixel_textures; - } else { - assert_true(vertex_samplers > 0); - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - range.NumDescriptors = vertex_textures; - } - } - - // Pixel or vertex samplers. - { - auto& parameter = - parameters[size_t(RootParameter::kPixelOrVertexSamplers)]; - auto& range = ranges[size_t(RootParameter::kPixelOrVertexSamplers)]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - if (pixel_samplers > 0) { - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - range.NumDescriptors = pixel_samplers; - } else { - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - range.NumDescriptors = vertex_samplers; - } - } - - if (pixel_textures > 0 && vertex_textures > 0) { - assert_true(vertex_samplers > 0); - - desc.NumParameters = UINT(RootParameter::kCountWithTwoStageTextures); - - // Vertex textures. - { - auto& parameter = parameters[size_t(RootParameter::kVertexTextures)]; - auto& range = ranges[size_t(RootParameter::kVertexTextures)]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = vertex_textures; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - } - - // Vertex samplers. - { - auto& parameter = parameters[size_t(RootParameter::kVertexSamplers)]; - auto& range = ranges[size_t(RootParameter::kVertexSamplers)]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - range.NumDescriptors = vertex_samplers; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - } - } - } - - ID3DBlob* blob; - ID3DBlob* error_blob = nullptr; - if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, - &blob, &error_blob))) { - XELOGE( - "Failed to serialize a root signature with %u pixel textures, %u " - "pixel samplers, %u vertex textures and %u vertex samplers", - pixel_textures, pixel_samplers, vertex_textures, vertex_samplers); - if (error_blob != nullptr) { - XELOGE("%s", - reinterpret_cast(error_blob->GetBufferPointer())); - error_blob->Release(); - } - return nullptr; - } - if (error_blob != nullptr) { - error_blob->Release(); - } - - auto device = context_->GetD3D12Provider()->GetDevice(); - ID3D12RootSignature* root_signature; - if (FAILED(device->CreateRootSignature(0, blob->GetBufferPointer(), - blob->GetBufferSize(), - IID_PPV_ARGS(&root_signature)))) { - XELOGE( - "Failed to create a root signature with %u pixel textures, %u pixel " - "samplers, %u vertex textures and %u vertex samplers", - pixel_textures, pixel_samplers, vertex_textures, vertex_samplers); - blob->Release(); - return nullptr; - } - blob->Release(); - - root_signatures_.insert({index, root_signature}); - return root_signature; -} - } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index fedb4865a..56c158a73 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -24,6 +24,8 @@ namespace xe { namespace gpu { namespace d3d12 { +class D3D12CommandProcessor; + class PipelineCache { public: enum class UpdateStatus { @@ -32,7 +34,8 @@ class PipelineCache { kError, }; - PipelineCache(RegisterFile* register_file, ui::d3d12::D3D12Context* context); + PipelineCache(D3D12CommandProcessor* command_processor, + RegisterFile* register_file, ui::d3d12::D3D12Context* context); ~PipelineCache(); void Shutdown(); @@ -49,50 +52,6 @@ class PipelineCache { void ClearCache(); - enum class RootParameter { - // These are always present. - - // Most frequently changed (for one object drawn multiple times, for - // instance - may contain projection matrices, also vertex offsets for - // objects drawn in multiple parts). - // This constants 8 pages of float constants (b2-b9) and fetch constants - // (b10). - kVertexConstants, - // Less frequently changed (per-material) - 8 pages of float constants - // (b2-b9). - kPixelConstants, - // Rarely changed - system constants like viewport and alpha testing (b0) - // and loop and bool constants (b1). - kCommonConstants, - // Never changed - shared memory byte address buffer (t0, space1). - kVirtualMemory, - - kCountNoTextures, - - // These are there only if textures are fetched (they are changed pretty - // frequently, but for the ease of maintenance they're in the end). - // If the pixel shader samples textures, these are for pixel textures - // (changed more frequently), otherwise, if the vertex shader samples - // textures, these are for vertex textures. - - // Used textures of all types (t0+, space0). - kPixelOrVertexTextures = kCountNoTextures, - // Used samplers (s0+). - kPixelOrVertexSamplers, - - kCountWithOneStageTextures, - - // These are only present if both pixel and vertex shaders sample textures - // for vertex textures. - - // Used textures of all types (t0+, space0). - kVertexTextures = kCountWithOneStageTextures, - // Used samplers (s0+). - kVertexSamplers, - - kCountWithTwoStageTextures, - }; - private: bool SetShadowRegister(uint32_t* dest, uint32_t register_name); bool SetShadowRegister(float* dest, uint32_t register_name); @@ -104,11 +63,11 @@ class PipelineCache { PrimitiveType primitive_type, IndexFormat index_format); - // pRootSignature, VS, PS, DS, HS, GS, PrimitiveTopologyType. + // pRootSignature, VS, PS, GS, PrimitiveTopologyType. UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, PrimitiveType primitive_type); - // BlendState, SampleMask. + // BlendState. UpdateStatus UpdateBlendState(D3D12Shader* pixel_shader); // RasterizerState. UpdateStatus UpdateRasterizerState(PrimitiveType primitive_type); @@ -119,19 +78,15 @@ class PipelineCache { // NumRenderTargets, RTVFormats, DSVFormat. UpdateStatus UpdateRenderTargetFormats(); - RegisterFile* register_file_ = nullptr; - ui::d3d12::D3D12Context* context_ = nullptr; + D3D12CommandProcessor* command_processor_; + RegisterFile* register_file_; + ui::d3d12::D3D12Context* context_; // Reusable shader translator. std::unique_ptr shader_translator_ = nullptr; // All loaded shaders mapped by their guest hash key. std::unordered_map shader_map_; - // Root signatures for different descriptor counts. - std::unordered_map root_signatures_; - ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader, - const D3D12Shader* pixel_shader); - // Hash state used to incrementally produce pipeline hashes during update. // By the time the full update pass has run the hash will represent the // current state in a way that can uniquely identify the produced @@ -139,12 +94,11 @@ class PipelineCache { XXH64_state_t hash_state_; struct Pipeline { ID3D12PipelineState* state; - // From root_signatures_ - not owned. + // Root signature taken from the command processor. ID3D12RootSignature* root_signature; }; // All previously generated pipelines mapped by hash. std::unordered_map pipelines_; - // Sets StreamOutput, InputLayout, SampleDesc, NodeMask, CachedPSO, Flags. Pipeline* GetPipeline(uint64_t hash_key); // Previously used pipeline. This matches our current state settings diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index 1f8e72f99..016d7dc23 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -62,6 +62,7 @@ bool SharedMemory::Initialize() { Shutdown(); return false; } + buffer_gpu_address_ = buffer_->GetGPUVirtualAddress(); std::memset(heaps_, 0, sizeof(heaps_)); heap_creation_failed_ = false; @@ -406,6 +407,32 @@ void SharedMemory::UseForWriting(ID3D12GraphicsCommandList* command_list) { TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, command_list); } +void SharedMemory::CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle) { + D3D12_SHADER_RESOURCE_VIEW_DESC desc; + desc.Format = DXGI_FORMAT_R32_TYPELESS; + desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + desc.Buffer.FirstElement = 0; + desc.Buffer.NumElements = kBufferSize >> 2; + desc.Buffer.StructureByteStride = 0; + desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + context_->GetD3D12Provider()->GetDevice()->CreateShaderResourceView( + buffer_, &desc, handle); +} + +void SharedMemory::CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) { + D3D12_UNORDERED_ACCESS_VIEW_DESC desc; + desc.Format = DXGI_FORMAT_R32_TYPELESS; + desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + desc.Buffer.FirstElement = 0; + desc.Buffer.NumElements = kBufferSize >> 2; + desc.Buffer.StructureByteStride = 0; + desc.Buffer.CounterOffsetInBytes = 0; + desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + context_->GetD3D12Provider()->GetDevice()->CreateUnorderedAccessView( + buffer_, nullptr, &desc, handle); +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index b0b904e92..77efd4164 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -33,6 +33,10 @@ class SharedMemory { bool Initialize(); void Shutdown(); + D3D12_GPU_VIRTUAL_ADDRESS GetGPUAddress() const { + return buffer_gpu_address_; + } + void BeginFrame(); // Returns true if anything has been written to command_list been done. // The draw command list is needed for the transition. @@ -51,6 +55,9 @@ class SharedMemory { // Makes the buffer usable for texture tiling after a resolve. void UseForWriting(ID3D12GraphicsCommandList* command_list); + void CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle); + void CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle); + private: Memory* memory_; @@ -61,6 +68,7 @@ class SharedMemory { static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2; static constexpr uint32_t kAddressMask = kBufferSize - 1; ID3D12Resource* buffer_ = nullptr; + D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0; D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST; // D3D resource tiles are 64 KB in size. diff --git a/src/xenia/gpu/hlsl_shader_translator.cc b/src/xenia/gpu/hlsl_shader_translator.cc index 8451fdbba..8e8ad632f 100644 --- a/src/xenia/gpu/hlsl_shader_translator.cc +++ b/src/xenia/gpu/hlsl_shader_translator.cc @@ -198,7 +198,7 @@ std::vector HlslShaderTranslator::CompleteTranslation() { " uint2 xe_vertex_fetch[96];\n" "};\n" "\n" - "ByteAddressBuffer xe_virtual_memory : register(t0, space1);\n" + "ByteAddressBuffer xe_shared_memory : register(t0, space1);\n" "\n" "#define XE_BYTE_SWAP_OVERLOAD(XeByteSwapType) \\\n" "XeByteSwapType XeByteSwap(XeByteSwapType v, uint endian) { \\\n" @@ -806,7 +806,7 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction( bool conditional_emitted = BeginPredicatedInstruction( instr.is_predicated, instr.predicate_condition); - // Load the element from the virtual memory as uints and swap. + // Load the element from the shared memory as uints and swap. EmitLoadOperand(0, instr.operands[0]); const char* load_swizzle; const char* load_function_suffix; @@ -832,9 +832,8 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction( load_function_suffix = ""; break; } - EmitSourceDepth( - "xe_vertex_element%s = XeByteSwap(xe_virtual_memory.Load%s(\n", - load_swizzle, load_function_suffix); + EmitSourceDepth("xe_vertex_element%s = XeByteSwap(xe_shared_memory.Load%s(\n", + load_swizzle, load_function_suffix); EmitSourceDepth(" (xe_vertex_fetch[%uu].x & 0x1FFFFFFCu)", instr.operands[1].storage_index); if (instr.attributes.stride != 0) { diff --git a/src/xenia/ui/d3d12/pools.cc b/src/xenia/ui/d3d12/pools.cc index de938b055..703b3279f 100644 --- a/src/xenia/ui/d3d12/pools.cc +++ b/src/xenia/ui/d3d12/pools.cc @@ -68,8 +68,8 @@ void UploadBufferPool::ClearCache() { uint8_t* UploadBufferPool::RequestFull(uint32_t size, ID3D12Resource*& buffer_out, uint32_t& offset_out) { - assert_true(size != 0 && size <= page_size_); - if (size == 0 || size > page_size_) { + assert_true(size <= page_size_); + if (size > page_size_) { return nullptr; } if (page_size_ - current_size_ < size || current_mapping_ == nullptr) { @@ -89,10 +89,6 @@ uint8_t* UploadBufferPool::RequestPartial(uint32_t size, ID3D12Resource*& buffer_out, uint32_t& offset_out, uint32_t& size_out) { - assert_true(size != 0); - if (size == 0) { - return nullptr; - } if (current_size_ == page_size_ || current_mapping_ == nullptr) { // Start a new page if can't fit any bytes or don't have an open page. if (!BeginNextPage()) { @@ -216,7 +212,7 @@ void DescriptorHeapPool::BeginFrame() { void DescriptorHeapPool::EndFrame() { EndPage(); } void DescriptorHeapPool::ClearCache() { - assert(current_size_ == 0); + assert_true(current_size_ == 0); while (unsent_ != nullptr) { auto next = unsent_->next; unsent_->heap->Release(); @@ -232,36 +228,39 @@ void DescriptorHeapPool::ClearCache() { sent_last_ = nullptr; } -uint64_t DescriptorHeapPool::GetPageForRequest(uint32_t count) const { - uint64_t page = current_page_; - if (page_size_ - current_size_ < count) { - ++page; - } - return page; -} - -bool DescriptorHeapPool::Request(uint32_t count, uint32_t& index_out) { - assert_true(count != 0 && count <= page_size_); - if (count == 0 || count > page_size_) { - return false; +uint64_t DescriptorHeapPool::Request(uint64_t previous_full_update, + uint32_t count_for_partial_update, + uint32_t count_for_full_update, + uint32_t& index_out) { + assert_true(count_for_partial_update <= count_for_full_update); + assert_true(count_for_full_update <= page_size_); + if (count_for_partial_update > count_for_full_update || + count_for_full_update > page_size_) { + return 0; } if (page_creation_failed_) { - // Don't increment the page index every call if there was a failure as well. - return false; + // Don't touch the page index every call if there was a failure as well. + return 0; } - // Go to the next page if there's not enough free space on the current one. + // If the last full update happened on the current page, a partial update is + // possible. + uint32_t count = previous_full_update == current_page_ + ? count_for_partial_update + : count_for_full_update; + + // Go to the next page if there's not enough free space on the current one, + // or because the previous page may be outdated. In this case, a full update + // is necessary. if (page_size_ - current_size_ < count) { EndPage(); ++current_page_; + count = count_for_full_update; } // Create the page if needed (may be the first call for the page). if (unsent_ == nullptr) { - if (page_creation_failed_) { - return false; - } auto device = context_->GetD3D12Provider()->GetDevice(); D3D12_DESCRIPTOR_HEAP_DESC heap_desc; heap_desc.Type = type_; @@ -273,7 +272,7 @@ bool DescriptorHeapPool::Request(uint32_t count, uint32_t& index_out) { XELOGE("Failed to create a heap for %u shader-visible descriptors", page_size_); page_creation_failed_ = true; - return false; + return 0; } unsent_ = new DescriptorHeap; unsent_->heap = heap; @@ -289,7 +288,7 @@ bool DescriptorHeapPool::Request(uint32_t count, uint32_t& index_out) { } index_out = current_size_; current_size_ += count; - return true; + return current_page_; } void DescriptorHeapPool::EndPage() { diff --git a/src/xenia/ui/d3d12/pools.h b/src/xenia/ui/d3d12/pools.h index 51d552e80..9954035af 100644 --- a/src/xenia/ui/d3d12/pools.h +++ b/src/xenia/ui/d3d12/pools.h @@ -71,13 +71,34 @@ class DescriptorHeapPool { void EndFrame(); void ClearCache(); - // To check if a rebind will be required, and thus may possibly need to write - // all the descriptors needed for a draw call rather than only the modified - // ones. The page number can never be 0 if a frame has started, and it's - // changed every frame, so it's safe to use 0 to indicate that the descriptors - // for some data have never been written. - uint64_t GetPageForRequest(uint32_t count) const; - bool Request(uint32_t count, uint32_t& index_out); + // Because all descriptors for a single draw call must be in the same heap, + // sometimes all descriptors, rather than only the modified portion of it, + // needs to be written. + // + // This may happen if there's not enough free space even for a partial update + // in the current heap, or if the heap which contains the unchanged part of + // the descriptors is outdated. + // + // If something uses this pool to do partial updates, it must let this + // function determine whether a partial update is possible. For this purpose, + // this function returns a full update number - and it must be called with its + // previous return value for the set of descriptors it's updating. + // + // If this function returns a value that is the same as previous_full_update, + // a partial update needs to be done - and space for count_for_partial_update + // is allocated. + // + // If it's different, all descriptors must be written again - and space for + // count_for_full_update is allocated. + // + // If 0 is returned, there was an error. + // + // This MUST be called even if there's nothing to write in a partial update + // (with count_for_partial_update being 0), because a full update may still be + // required. + uint64_t Request(uint64_t previous_full_update, + uint32_t count_for_partial_update, + uint32_t count_for_full_update, uint32_t& index_out); // The current heap, for binding and actually writing - may be called only // after a successful request because before a request, the heap may not exist @@ -89,7 +110,6 @@ class DescriptorHeapPool { D3D12_GPU_DESCRIPTOR_HANDLE GetLastRequestHeapGPUStart() const { return current_heap_gpu_start_; } - uint64_t GetLastRequestPageNumber() const { return current_page_; } private: D3D12Context* context_;