diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index f0be8c50e..3338d5d9b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -387,7 +387,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( sampler_count_vertex); return nullptr; } - root_signatures_bindful_.insert({index, root_signature}); + root_signatures_bindful_.emplace(index, root_signature); return root_signature; } @@ -745,12 +745,11 @@ void D3D12CommandProcessor::SetSamplePositions( current_sample_positions_ = sample_positions; } -void D3D12CommandProcessor::SetComputePipelineState( - ID3D12PipelineState* pipeline_state) { - if (current_external_pipeline_state_ != pipeline_state) { - deferred_command_list_.D3DSetPipelineState(pipeline_state); - current_external_pipeline_state_ = pipeline_state; - current_cached_pipeline_state_ = nullptr; +void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) { + if (current_external_pipeline_ != pipeline) { + deferred_command_list_.D3DSetPipelineState(pipeline); + current_external_pipeline_ = pipeline; + current_cached_pipeline_ = nullptr; } } @@ -773,8 +772,16 @@ std::string D3D12CommandProcessor::GetWindowTitleText() const { } // Currently scaling is only supported with ROV. if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) { - return "Direct3D 12 - 2x"; + return "Direct3D 12 - ROV 2x"; } + // Rasterizer-ordered views are a feature very rarely used as of 2020 and + // that faces adoption complications (outside of Direct3D - on Vulkan - at + // least), but crucial to Xenia - raise awareness of its usage. + // https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319 + // "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I + // wasn't aware that Xenia D3D12 backend was using Raster Order Views + // feature" - oscarbg in that issue. + return "Direct3D 12 - ROV"; } return "Direct3D 12"; } @@ -1196,7 +1203,7 @@ bool D3D12CommandProcessor::SetupContext() { *this, *register_file_, bindless_resources_used_, edram_rov_used_, texture_cache_->IsResolutionScale2X() ? 2 : 1); if (!pipeline_cache_->Initialize()) { - XELOGE("Failed to initialize the graphics pipeline state cache"); + XELOGE("Failed to initialize the graphics pipeline cache"); return false; } @@ -1526,8 +1533,7 @@ void D3D12CommandProcessor::ShutdownContext() { // Shut down binding - bindless descriptors may be owned by subsystems like // the texture cache. - // Root signatured are used by pipeline states, thus freed after the pipeline - // states. + // Root signatures are used by pipelines, thus freed after the pipelines. ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_); ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_); for (auto it : root_signatures_bindful_) { @@ -1878,7 +1884,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, xenos::VertexShaderExportMode::kMultipass || (primitive_two_faced && pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back))) { - // All faces are culled - can't be expressed in the pipeline state. + // All faces are culled - can't be expressed in the pipeline. return true; } @@ -1954,7 +1960,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, line_loop_closing_index = 0; } - // Update the textures - this may bind pipeline state objects. + // Update the textures - this may bind pipelines. uint32_t used_texture_mask = vertex_shader->GetUsedTextureMask() | (pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0); @@ -1972,21 +1978,21 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, early_z = true; } - // Create the pipeline state object if needed and bind it. - void* pipeline_state_handle; + // Create the pipeline if needed and bind it. + void* pipeline_handle; ID3D12RootSignature* root_signature; if (!pipeline_cache_->ConfigurePipeline( vertex_shader, pixel_shader, primitive_type_converted, indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16, - early_z, pipeline_render_targets, &pipeline_state_handle, + early_z, pipeline_render_targets, &pipeline_handle, &root_signature)) { return false; } - if (current_cached_pipeline_state_ != pipeline_state_handle) { + if (current_cached_pipeline_ != pipeline_handle) { deferred_command_list_.SetPipelineStateHandle( - reinterpret_cast(pipeline_state_handle)); - current_cached_pipeline_state_ = pipeline_state_handle; - current_external_pipeline_state_ = nullptr; + reinterpret_cast(pipeline_handle)); + current_cached_pipeline_ = pipeline_handle; + current_external_pipeline_ = nullptr; } // Update viewport, scissor, blend factor and stencil reference. @@ -2005,14 +2011,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, } // Must not call anything that can change the descriptor heap from now on! - // Ensure vertex and index buffers are resident and draw. + // Ensure vertex buffers are resident. // TODO(Triang3l): Cache residency for ranges in a way similar to how texture - // validity will be tracked. + // validity is tracked. uint64_t vertex_buffers_resident[2] = {}; - for (const auto& vertex_binding : vertex_shader->vertex_bindings()) { + for (const Shader::VertexBinding& vertex_binding : + vertex_shader->vertex_bindings()) { uint32_t vfetch_index = vertex_binding.fetch_constant; if (vertex_buffers_resident[vfetch_index >> 6] & - (1ull << (vfetch_index & 63))) { + (uint64_t(1) << (vfetch_index & 63))) { continue; } const auto& vfetch_constant = regs.Get( @@ -2045,7 +2052,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, vfetch_constant.address << 2, vfetch_constant.size << 2); return false; } - vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); + vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1) + << (vfetch_index & 63); } // Gather memexport ranges and ensure the heaps for them are resident, and @@ -2517,8 +2525,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { submission_open_ = true; // Start a new deferred command list - will submit it to the real one in the - // end of the submission (when async pipeline state object creation requests - // are fulfilled). + // end of the submission (when async pipeline creation requests are + // fulfilled). deferred_command_list_.Reset(); // Reset cached state of the command list. @@ -2527,8 +2535,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { ff_blend_factor_update_needed_ = true; ff_stencil_ref_update_needed_ = true; current_sample_positions_ = xenos::MsaaSamples::k1X; - current_cached_pipeline_state_ = nullptr; - current_external_pipeline_state_ = nullptr; + current_cached_pipeline_ = nullptr; + current_external_pipeline_ = nullptr; current_graphics_root_signature_ = nullptr; current_graphics_root_up_to_date_ = 0; if (bindless_resources_used_) { @@ -2724,7 +2732,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) { } bool D3D12CommandProcessor::CanEndSubmissionImmediately() const { - return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates(); + return !submission_open_ || !pipeline_cache_->IsCreatingPipelines(); } void D3D12CommandProcessor::ClearCommandAllocatorCache() { @@ -2745,12 +2753,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() { } void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { - auto& regs = *register_file_; - #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES + const RegisterFile& regs = *register_file_; + // Window parameters. // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // See r200UpdateWindow: @@ -2846,14 +2854,14 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { scissor.right = pa_sc_window_scissor_br.br_x; scissor.bottom = pa_sc_window_scissor_br.br_y; if (!pa_sc_window_scissor_tl.window_offset_disable) { - scissor.left = - std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0)); - scissor.top = - std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0)); - scissor.right = - std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0)); - scissor.bottom = - std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0)); + scissor.left = std::max( + LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.top = std::max( + LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0)); + scissor.right = std::max( + LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.bottom = std::max( + LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0)); } scissor.left *= pixel_size_x; scissor.top *= pixel_size_y; @@ -2915,12 +2923,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( uint32_t line_loop_closing_index, xenos::Endian index_endian, uint32_t used_texture_mask, bool early_z, uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]) { - auto& regs = *register_file_; - #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES + const RegisterFile& regs = *register_file_; auto pa_cl_clip_cntl = regs.Get(); auto pa_cl_vte_cntl = regs.Get(); auto pa_su_point_minmax = regs.Get(); @@ -3103,14 +3110,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index; system_constants_.line_loop_closing_index = line_loop_closing_index; - // Vertex index offset. - dirty |= system_constants_.vertex_base_index != vgt_indx_offset; - system_constants_.vertex_base_index = vgt_indx_offset; - // Index or tessellation edge factor buffer endianness. dirty |= system_constants_.vertex_index_endian != index_endian; system_constants_.vertex_index_endian = index_endian; + // Vertex index offset. + dirty |= system_constants_.vertex_base_index != vgt_indx_offset; + system_constants_.vertex_base_index = vgt_indx_offset; + // User clip planes (UCP_ENA_#), when not CLIP_DISABLE. if (!pa_cl_clip_cntl.clip_disable) { for (uint32_t i = 0; i < 6; ++i) { @@ -3574,7 +3581,7 @@ bool D3D12CommandProcessor::UpdateBindings( float_constant_map_vertex.float_bitmap[i]; // If no float constants at all, we can reuse any buffer for them, so not // invalidating. - if (float_constant_map_vertex.float_count != 0) { + if (float_constant_count_vertex) { cbuffer_binding_float_vertex_.up_to_date = false; } } @@ -3589,7 +3596,7 @@ bool D3D12CommandProcessor::UpdateBindings( float_constant_map_pixel.float_bitmap[i]) { current_float_constant_map_pixel_[i] = float_constant_map_pixel.float_bitmap[i]; - if (float_constant_map_pixel.float_count != 0) { + if (float_constant_count_pixel) { cbuffer_binding_float_pixel_.up_to_date = false; } } @@ -3889,8 +3896,8 @@ bool D3D12CommandProcessor::UpdateBindings( sampler_parameters, provider.OffsetSamplerDescriptor( sampler_bindless_heap_cpu_start_, sampler_index)); - texture_cache_bindless_sampler_map_.insert( - {sampler_parameters.value, sampler_index}); + texture_cache_bindless_sampler_map_.emplace( + sampler_parameters.value, sampler_index); } current_sampler_bindless_indices_vertex_[j] = sampler_index; } @@ -3921,8 +3928,8 @@ bool D3D12CommandProcessor::UpdateBindings( sampler_parameters, provider.OffsetSamplerDescriptor( sampler_bindless_heap_cpu_start_, sampler_index)); - texture_cache_bindless_sampler_map_.insert( - {sampler_parameters.value, sampler_index}); + texture_cache_bindless_sampler_map_.emplace( + sampler_parameters.value, sampler_index); } current_sampler_bindless_indices_pixel_[j] = sampler_index; } diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 0b5a80e68..42c1e0092 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -186,19 +186,17 @@ class D3D12CommandProcessor : public CommandProcessor { // render targets or copying to depth render targets. void SetSamplePositions(xenos::MsaaSamples sample_positions); - // Returns a pipeline state object with deferred creation by its handle. May - // return nullptr if failed to create the pipeline state object. - inline ID3D12PipelineState* GetD3D12PipelineStateByHandle( - void* handle) const { - return pipeline_cache_->GetD3D12PipelineStateByHandle(handle); + // Returns a pipeline with deferred creation by its handle. May return nullptr + // if failed to create the pipeline. + inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const { + return pipeline_cache_->GetD3D12PipelineByHandle(handle); } - // Sets the current pipeline state to a compute one. This is for cache - // invalidation primarily. A submission must be open. - void SetComputePipelineState(ID3D12PipelineState* pipeline_state); + // Sets the current pipeline to a compute one. This is for cache invalidation + // primarily. A submission must be open. + void SetComputePipeline(ID3D12PipelineState* pipeline); - // For the pipeline state cache to call when binding layout UIDs may be - // reused. + // For the pipeline cache to call when binding layout UIDs may be reused. void NotifyShaderBindingsLayoutUIDsInvalidated(); // Returns the text to display in the GPU backend name in the window title. @@ -323,8 +321,8 @@ class D3D12CommandProcessor : public CommandProcessor { bool EndSubmission(bool is_swap); // Checks if ending a submission right now would not cause potentially more // delay than it would reduce by making the GPU start working earlier - such - // as when there are unfinished graphics pipeline state creation requests that - // would need to be fulfilled before actually submitting the command list. + // as when there are unfinished graphics pipeline creation requests that would + // need to be fulfilled before actually submitting the command list. bool CanEndSubmissionImmediately() const; bool AwaitAllQueueOperationsCompletion() { CheckSubmissionFence(submission_current_); @@ -548,13 +546,12 @@ class D3D12CommandProcessor : public CommandProcessor { // Current SSAA sample positions (to be updated by the render target cache). xenos::MsaaSamples current_sample_positions_; - // Currently bound pipeline state, either a graphics pipeline state object - // from the pipeline state cache (with potentially deferred creation - - // current_external_pipeline_state_ is nullptr in this case) or a non-Xenos - // graphics or compute pipeline state object (current_cached_pipeline_state_ - // is nullptr in this case). - void* current_cached_pipeline_state_; - ID3D12PipelineState* current_external_pipeline_state_; + // Currently bound pipeline, either a graphics pipeline from the pipeline + // cache (with potentially deferred creation - current_external_pipeline_ is + // nullptr in this case) or a non-Xenos graphics or compute pipeline + // (current_cached_pipeline_ is nullptr in this case). + void* current_cached_pipeline_; + ID3D12PipelineState* current_external_pipeline_; // Currently bound graphics root signature. ID3D12RootSignature* current_graphics_root_signature_; diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index e50bbbaac..d32f223ce 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -157,7 +157,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor, stretch_pipeline_desc.SampleDesc.Count = 1; if (FAILED(device->CreateGraphicsPipelineState( &stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) { - XELOGE("Failed to create the front buffer stretch pipeline state"); + XELOGE("Failed to create the front buffer stretch pipeline"); stretch_gamma_root_signature_->Release(); stretch_gamma_root_signature_ = nullptr; stretch_root_signature_->Release(); @@ -170,8 +170,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor, if (FAILED(device->CreateGraphicsPipelineState( &stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) { XELOGE( - "Failed to create the gamma-correcting front buffer stretch " - "pipeline state"); + "Failed to create the gamma-correcting front buffer stretch pipeline"); stretch_pipeline_->Release(); stretch_pipeline_ = nullptr; stretch_gamma_root_signature_->Release(); diff --git a/src/xenia/gpu/d3d12/d3d12_shader.h b/src/xenia/gpu/d3d12/d3d12_shader.h index 7eb4ac6e0..c24d6a00a 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.h +++ b/src/xenia/gpu/d3d12/d3d12_shader.h @@ -85,7 +85,7 @@ class D3D12Shader : public Shader { return sampler_bindings_.data(); } - // For owning subsystems like the pipeline state cache, accessors for unique + // For owning subsystems like the pipeline cache, accessors for unique // identifiers (used instead of hashes to make sure collisions can't happen) // of binding layouts used by the shader, for invalidation if a shader with an // incompatible layout was bound. diff --git a/src/xenia/gpu/d3d12/deferred_command_list.cc b/src/xenia/gpu/d3d12/deferred_command_list.cc index 2b013e8ad..eb8d8922e 100644 --- a/src/xenia/gpu/d3d12/deferred_command_list.cc +++ b/src/xenia/gpu/d3d12/deferred_command_list.cc @@ -209,9 +209,8 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list, } } break; case Command::kSetPipelineStateHandle: { - current_pipeline_state = - command_processor_.GetD3D12PipelineStateByHandle( - *reinterpret_cast(stream)); + current_pipeline_state = command_processor_.GetD3D12PipelineByHandle( + *reinterpret_cast(stream)); if (current_pipeline_state) { command_list->SetPipelineState(current_pipeline_state); } diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 3a9f609d3..b2db2654e 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -43,10 +43,10 @@ DEFINE_bool( "D3D12"); DEFINE_int32( d3d12_pipeline_creation_threads, -1, - "Number of threads used for graphics pipeline state object creation. -1 to " - "calculate automatically (75% of logical CPU cores), a positive number to " - "specify the number of threads explicitly (up to the number of logical CPU " - "cores), 0 to disable multithreaded pipeline state object creation.", + "Number of threads used for graphics pipeline creation. -1 to calculate " + "automatically (75% of logical CPU cores), a positive number to specify " + "the number of threads explicitly (up to the number of logical CPU cores), " + "0 to disable multithreaded pipeline creation.", "D3D12"); DEFINE_bool(d3d12_tessellation_wireframe, false, "Display tessellated surfaces as wireframe for debugging.", @@ -125,8 +125,8 @@ bool PipelineCache::Initialize() { logical_processor_count = 6; } // Initialize creation thread synchronization data even if not using creation - // threads because they may be used anyway to create pipeline state objects - // from the storage. + // threads because they may be used anyway to create pipelines from the + // storage. creation_threads_busy_ = 0; creation_completion_event_ = xe::threading::Event::CreateManualResetEvent(true); @@ -145,7 +145,7 @@ bool PipelineCache::Initialize() { for (size_t i = 0; i < creation_thread_count; ++i) { std::unique_ptr creation_thread = xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); }); - creation_thread->set_name("D3D12 Pipeline States"); + creation_thread->set_name("D3D12 Pipelines"); creation_threads_.push_back(std::move(creation_thread)); } } @@ -184,13 +184,12 @@ void PipelineCache::ClearCache(bool shutting_down) { } ShutdownShaderStorage(); - // Remove references to the current pipeline state object. - current_pipeline_state_ = nullptr; + // Remove references to the current pipeline. + current_pipeline_ = nullptr; if (!creation_threads_.empty()) { - // Empty the pipeline state object creation queue and make sure there are no - // threads currently creating pipeline state objects because pipeline states - // are going to be deleted. + // Empty the pipeline creation queue and make sure there are no threads + // currently creating pipelines because pipelines are going to be deleted. bool await_creation_completion_event = false; { std::lock_guard lock(creation_request_lock_); @@ -207,13 +206,13 @@ void PipelineCache::ClearCache(bool shutting_down) { } } - // Destroy all pipeline state objects. - for (auto it : pipeline_states_) { + // Destroy all pipelines. + for (auto it : pipelines_) { it.second->state->Release(); delete it.second; } - pipeline_states_.clear(); - COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0); + pipelines_.clear(); + COUNT_profile_set("gpu/pipeline_cache/pipelines", 0); // Destroy all shaders. command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated(); @@ -223,10 +222,10 @@ void PipelineCache::ClearCache(bool shutting_down) { } texture_binding_layout_map_.clear(); texture_binding_layouts_.clear(); - for (auto it : shader_map_) { + for (auto it : shaders_) { delete it.second; } - shader_map_.clear(); + shaders_.clear(); if (reinitialize_shader_storage) { InitializeShaderStorage(shader_storage_root, shader_storage_title_id, @@ -374,8 +373,7 @@ void PipelineCache::InitializeShaderStorage( } size_t ucode_byte_count = shader_header.ucode_dword_count * sizeof(uint32_t); - if (shader_map_.find(shader_header.ucode_data_hash) != - shader_map_.end()) { + if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) { // Already added - usually shaders aren't added without the intention of // translating them imminently, so don't do additional checks to // actually ensure that translation happens right now (they would cause @@ -402,7 +400,7 @@ void PipelineCache::InitializeShaderStorage( D3D12Shader* shader = new D3D12Shader(shader_header.type, ucode_data_hash, ucode_dwords.data(), shader_header.ucode_dword_count); - shader_map_.insert({ucode_data_hash, shader}); + shaders_.emplace(ucode_data_hash, shader); // Create new threads if the currently existing threads can't keep up with // file reading, but not more than the number of logical processors minus // one. @@ -439,7 +437,7 @@ void PipelineCache::InitializeShaderStorage( } shader_translation_threads.clear(); for (D3D12Shader* shader : shaders_failed_to_translate) { - shader_map_.erase(shader->ucode_data_hash()); + shaders_.erase(shader->ucode_data_hash()); delete shader; } } @@ -460,72 +458,66 @@ void PipelineCache::InitializeShaderStorage( } // 'DXRO' or 'DXRT'. - const uint32_t pipeline_state_storage_magic_api = + const uint32_t pipeline_storage_magic_api = edram_rov_used_ ? 0x4F525844 : 0x54525844; - // Initialize the pipeline state storage stream. - uint64_t pipeline_state_storage_initialization_start_ = + // Initialize the pipeline storage stream. + uint64_t pipeline_storage_initialization_start_ = xe::Clock::QueryHostTickCount(); - auto pipeline_state_storage_file_path = + auto pipeline_storage_file_path = shader_storage_shareable_root / fmt::format("{:08X}.{}.d3d12.xpso", title_id, edram_rov_used_ ? "rov" : "rtv"); - pipeline_state_storage_file_ = - xe::filesystem::OpenFile(pipeline_state_storage_file_path, "a+b"); - if (!pipeline_state_storage_file_) { + pipeline_storage_file_ = + xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b"); + if (!pipeline_storage_file_) { XELOGE( - "Failed to open the Direct3D 12 pipeline state description storage " - "file for writing, persistent shader storage will be disabled: {}", - xe::path_to_utf8(pipeline_state_storage_file_path)); + "Failed to open the Direct3D 12 pipeline description storage file for " + "writing, persistent shader storage will be disabled: {}", + xe::path_to_utf8(pipeline_storage_file_path)); fclose(shader_storage_file_); shader_storage_file_ = nullptr; return; } - pipeline_state_storage_file_flush_needed_ = false; + pipeline_storage_file_flush_needed_ = false; // 'XEPS'. - const uint32_t pipeline_state_storage_magic = 0x53504558; + const uint32_t pipeline_storage_magic = 0x53504558; struct { uint32_t magic; uint32_t magic_api; uint32_t version_swapped; - } pipeline_state_storage_file_header; - if (fread(&pipeline_state_storage_file_header, - sizeof(pipeline_state_storage_file_header), 1, - pipeline_state_storage_file_) && - pipeline_state_storage_file_header.magic == - pipeline_state_storage_magic && - pipeline_state_storage_file_header.magic_api == - pipeline_state_storage_magic_api && - xe::byte_swap(pipeline_state_storage_file_header.version_swapped) == + } pipeline_storage_file_header; + if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header), + 1, pipeline_storage_file_) && + pipeline_storage_file_header.magic == pipeline_storage_magic && + pipeline_storage_file_header.magic_api == pipeline_storage_magic_api && + xe::byte_swap(pipeline_storage_file_header.version_swapped) == PipelineDescription::kVersion) { - uint64_t pipeline_state_storage_valid_bytes = - sizeof(pipeline_state_storage_file_header); - // Enqueue pipeline state descriptions written by previous Xenia executions - // until the end of the file or until a corrupted one is detected. - xe::filesystem::Seek(pipeline_state_storage_file_, 0, SEEK_END); - int64_t pipeline_state_storage_told_end = - xe::filesystem::Tell(pipeline_state_storage_file_); - size_t pipeline_state_storage_told_count = - size_t(pipeline_state_storage_told_end >= - int64_t(pipeline_state_storage_valid_bytes) - ? (uint64_t(pipeline_state_storage_told_end) - - pipeline_state_storage_valid_bytes) / - sizeof(PipelineStoredDescription) - : 0); - if (pipeline_state_storage_told_count && - xe::filesystem::Seek(pipeline_state_storage_file_, - int64_t(pipeline_state_storage_valid_bytes), - SEEK_SET)) { + uint64_t pipeline_storage_valid_bytes = + sizeof(pipeline_storage_file_header); + // Enqueue pipeline descriptions written by previous Xenia executions until + // the end of the file or until a corrupted one is detected. + xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END); + int64_t pipeline_storage_told_end = + xe::filesystem::Tell(pipeline_storage_file_); + size_t pipeline_storage_told_count = size_t( + pipeline_storage_told_end >= int64_t(pipeline_storage_valid_bytes) + ? (uint64_t(pipeline_storage_told_end) - + pipeline_storage_valid_bytes) / + sizeof(PipelineStoredDescription) + : 0); + if (pipeline_storage_told_count && + xe::filesystem::Seek(pipeline_storage_file_, + int64_t(pipeline_storage_valid_bytes), SEEK_SET)) { std::vector pipeline_stored_descriptions; - pipeline_stored_descriptions.resize(pipeline_state_storage_told_count); - pipeline_stored_descriptions.resize(fread( - pipeline_stored_descriptions.data(), - sizeof(PipelineStoredDescription), pipeline_state_storage_told_count, - pipeline_state_storage_file_)); + pipeline_stored_descriptions.resize(pipeline_storage_told_count); + pipeline_stored_descriptions.resize( + fread(pipeline_stored_descriptions.data(), + sizeof(PipelineStoredDescription), pipeline_storage_told_count, + pipeline_storage_file_)); if (!pipeline_stored_descriptions.empty()) { // Launch additional creation threads to use all cores to create - // pipeline state objects faster. Will also be using the main thread, so - // minus 1. + // pipelines faster. Will also be using the main thread, so minus 1. size_t creation_thread_original_count = creation_threads_.size(); size_t creation_thread_needed_count = std::max(std::min(pipeline_stored_descriptions.size(), @@ -539,10 +531,10 @@ void PipelineCache::InitializeShaderStorage( {}, [this, creation_thread_index]() { CreationThread(creation_thread_index); }); - creation_thread->set_name("D3D12 Pipeline States Additional"); + creation_thread->set_name("D3D12 Pipelines"); creation_threads_.push_back(std::move(creation_thread)); } - size_t pipeline_states_created = 0; + size_t pipelines_created = 0; for (const PipelineStoredDescription& pipeline_stored_description : pipeline_stored_descriptions) { const PipelineDescription& pipeline_description = @@ -554,30 +546,28 @@ void PipelineCache::InitializeShaderStorage( 0) != pipeline_stored_description.description_hash) { break; } - pipeline_state_storage_valid_bytes += - sizeof(PipelineStoredDescription); - // Skip already known pipeline states - those have already been - // enqueued. - auto found_range = pipeline_states_.equal_range( + pipeline_storage_valid_bytes += sizeof(PipelineStoredDescription); + // Skip already known pipelines - those have already been enqueued. + auto found_range = pipelines_.equal_range( pipeline_stored_description.description_hash); - bool pipeline_state_found = false; + bool pipeline_found = false; for (auto it = found_range.first; it != found_range.second; ++it) { - PipelineState* found_pipeline_state = it->second; - if (!std::memcmp(&found_pipeline_state->description.description, + Pipeline* found_pipeline = it->second; + if (!std::memcmp(&found_pipeline->description.description, &pipeline_description, sizeof(pipeline_description))) { - pipeline_state_found = true; + pipeline_found = true; break; } } - if (pipeline_state_found) { + if (pipeline_found) { continue; } PipelineRuntimeDescription pipeline_runtime_description; auto vertex_shader_it = - shader_map_.find(pipeline_description.vertex_shader_hash); - if (vertex_shader_it == shader_map_.end()) { + shaders_.find(pipeline_description.vertex_shader_hash); + if (vertex_shader_it == shaders_.end()) { continue; } pipeline_runtime_description.vertex_shader = vertex_shader_it->second; @@ -586,8 +576,8 @@ void PipelineCache::InitializeShaderStorage( } if (pipeline_description.pixel_shader_hash) { auto pixel_shader_it = - shader_map_.find(pipeline_description.pixel_shader_hash); - if (pixel_shader_it == shader_map_.end()) { + shaders_.find(pipeline_description.pixel_shader_hash); + if (pixel_shader_it == shaders_.end()) { continue; } pipeline_runtime_description.pixel_shader = pixel_shader_it->second; @@ -607,36 +597,33 @@ void PipelineCache::InitializeShaderStorage( std::memcpy(&pipeline_runtime_description.description, &pipeline_description, sizeof(pipeline_description)); - PipelineState* new_pipeline_state = new PipelineState; - new_pipeline_state->state = nullptr; - std::memcpy(&new_pipeline_state->description, - &pipeline_runtime_description, + Pipeline* new_pipeline = new Pipeline; + new_pipeline->state = nullptr; + std::memcpy(&new_pipeline->description, &pipeline_runtime_description, sizeof(pipeline_runtime_description)); - pipeline_states_.insert( - std::make_pair(pipeline_stored_description.description_hash, - new_pipeline_state)); - COUNT_profile_set("gpu/pipeline_cache/pipeline_states", - pipeline_states_.size()); + pipelines_.emplace(pipeline_stored_description.description_hash, + new_pipeline); + COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size()); if (!creation_threads_.empty()) { // Submit the pipeline for creation to any available thread. { std::lock_guard lock(creation_request_lock_); - creation_queue_.push_back(new_pipeline_state); + creation_queue_.push_back(new_pipeline); } creation_request_cond_.notify_one(); } else { - new_pipeline_state->state = - CreateD3D12PipelineState(pipeline_runtime_description); + new_pipeline->state = + CreateD3D12Pipeline(pipeline_runtime_description); } - ++pipeline_states_created; + ++pipelines_created; } - CreateQueuedPipelineStatesOnProcessorThread(); + CreateQueuedPipelinesOnProcessorThread(); if (creation_threads_.size() > creation_thread_original_count) { { std::lock_guard lock(creation_request_lock_); creation_threads_shutdown_from_ = creation_thread_original_count; // Assuming the queue is empty because of - // CreateQueuedPipelineStatesOnProcessorThread. + // CreateQueuedPipelinesOnProcessorThread. } creation_request_cond_.notify_all(); while (creation_threads_.size() > creation_thread_original_count) { @@ -664,26 +651,23 @@ void PipelineCache::InitializeShaderStorage( } } XELOGGPU( - "Created {} graphics pipeline state objects from the storage in {} " - "milliseconds", - pipeline_states_created, + "Created {} graphics pipelines from the storage in {} milliseconds", + pipelines_created, (xe::Clock::QueryHostTickCount() - - pipeline_state_storage_initialization_start_) * + pipeline_storage_initialization_start_) * 1000 / xe::Clock::QueryHostTickFrequency()); } } - xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, - pipeline_state_storage_valid_bytes); + xe::filesystem::TruncateStdioFile(pipeline_storage_file_, + pipeline_storage_valid_bytes); } else { - xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, 0); - pipeline_state_storage_file_header.magic = pipeline_state_storage_magic; - pipeline_state_storage_file_header.magic_api = - pipeline_state_storage_magic_api; - pipeline_state_storage_file_header.version_swapped = + xe::filesystem::TruncateStdioFile(pipeline_storage_file_, 0); + pipeline_storage_file_header.magic = pipeline_storage_magic; + pipeline_storage_file_header.magic_api = pipeline_storage_magic_api; + pipeline_storage_file_header.version_swapped = xe::byte_swap(PipelineDescription::kVersion); - fwrite(&pipeline_state_storage_file_header, - sizeof(pipeline_state_storage_file_header), 1, - pipeline_state_storage_file_); + fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header), + 1, pipeline_storage_file_); } shader_storage_root_ = storage_root; @@ -691,7 +675,7 @@ void PipelineCache::InitializeShaderStorage( // Start the storage writing thread. storage_write_flush_shaders_ = false; - storage_write_flush_pipeline_states_ = false; + storage_write_flush_pipelines_ = false; storage_write_thread_shutdown_ = false; storage_write_thread_ = xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); }); @@ -708,12 +692,12 @@ void PipelineCache::ShutdownShaderStorage() { storage_write_thread_.reset(); } storage_write_shader_queue_.clear(); - storage_write_pipeline_state_queue_.clear(); + storage_write_pipeline_queue_.clear(); - if (pipeline_state_storage_file_) { - fclose(pipeline_state_storage_file_); - pipeline_state_storage_file_ = nullptr; - pipeline_state_storage_file_flush_needed_ = false; + if (pipeline_storage_file_) { + fclose(pipeline_storage_file_); + pipeline_storage_file_ = nullptr; + pipeline_storage_file_flush_needed_ = false; } if (shader_storage_file_) { @@ -728,30 +712,29 @@ void PipelineCache::ShutdownShaderStorage() { void PipelineCache::EndSubmission() { if (shader_storage_file_flush_needed_ || - pipeline_state_storage_file_flush_needed_) { + pipeline_storage_file_flush_needed_) { { std::lock_guard lock(storage_write_request_lock_); if (shader_storage_file_flush_needed_) { storage_write_flush_shaders_ = true; } - if (pipeline_state_storage_file_flush_needed_) { - storage_write_flush_pipeline_states_ = true; + if (pipeline_storage_file_flush_needed_) { + storage_write_flush_pipelines_ = true; } } storage_write_request_cond_.notify_one(); shader_storage_file_flush_needed_ = false; - pipeline_state_storage_file_flush_needed_ = false; + pipeline_storage_file_flush_needed_ = false; } if (!creation_threads_.empty()) { - CreateQueuedPipelineStatesOnProcessorThread(); - // Await creation of all queued pipeline state objects. + CreateQueuedPipelinesOnProcessorThread(); + // Await creation of all queued pipelines. bool await_creation_completion_event; { std::lock_guard lock(creation_request_lock_); // Assuming the creation queue is already empty (because the processor - // thread also worked on creating the leftover pipeline state objects), so - // only check if there are threads with pipeline state objects currently - // being created. + // thread also worked on creating the leftover pipelines), so only check + // if there are threads with pipelines currently being created. await_creation_completion_event = creation_threads_busy_ != 0; if (await_creation_completion_event) { creation_completion_event_->Reset(); @@ -765,7 +748,7 @@ void PipelineCache::EndSubmission() { } } -bool PipelineCache::IsCreatingPipelineStates() { +bool PipelineCache::IsCreatingPipelines() { if (creation_threads_.empty()) { return false; } @@ -779,8 +762,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, uint32_t dword_count) { // Hash the input memory and lookup the shader. uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); - auto it = shader_map_.find(data_hash); - if (it != shader_map_.end()) { + auto it = shaders_.find(data_hash); + if (it != shaders_.end()) { // Shader has been previously loaded. return it->second; } @@ -790,7 +773,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, // again. D3D12Shader* shader = new D3D12Shader(shader_type, data_hash, host_address, dword_count); - shader_map_.insert({data_hash, shader}); + shaders_.emplace(data_hash, shader); return shader; } @@ -798,11 +781,11 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid() const { // If the values this functions returns are changed, INVALIDATE THE SHADER - // STORAGE (increase kVersion for BOTH shaders and pipeline states)! The - // exception is when the function originally returned "unsupported", but - // started to return a valid value (in this case the shader wouldn't be cached - // in the first place). Otherwise games will not be able to locate shaders for - // draws for which the host vertex shader type has changed! + // STORAGE (increase kVersion for BOTH shaders and pipelines)! The exception + // is when the function originally returned "unsupported", but started to + // return a valid value (in this case the shader wouldn't be cached in the + // first place). Otherwise games will not be able to locate shaders for draws + // for which the host vertex shader type has changed! const auto& regs = register_file_; auto vgt_draw_initiator = regs.Get(); if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode, @@ -929,13 +912,12 @@ bool PipelineCache::ConfigurePipeline( xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], - void** pipeline_state_handle_out, - ID3D12RootSignature** root_signature_out) { + void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES - assert_not_null(pipeline_state_handle_out); + assert_not_null(pipeline_handle_out); assert_not_null(root_signature_out); PipelineRuntimeDescription runtime_description; @@ -946,24 +928,24 @@ bool PipelineCache::ConfigurePipeline( } PipelineDescription& description = runtime_description.description; - if (current_pipeline_state_ != nullptr && - !std::memcmp(¤t_pipeline_state_->description.description, - &description, sizeof(description))) { - *pipeline_state_handle_out = current_pipeline_state_; + if (current_pipeline_ != nullptr && + !std::memcmp(¤t_pipeline_->description.description, &description, + sizeof(description))) { + *pipeline_handle_out = current_pipeline_; *root_signature_out = runtime_description.root_signature; return true; } - // Find an existing pipeline state object in the cache. + // Find an existing pipeline in the cache. uint64_t hash = XXH64(&description, sizeof(description), 0); - auto found_range = pipeline_states_.equal_range(hash); + auto found_range = pipelines_.equal_range(hash); for (auto it = found_range.first; it != found_range.second; ++it) { - PipelineState* found_pipeline_state = it->second; - if (!std::memcmp(&found_pipeline_state->description.description, - &description, sizeof(description))) { - current_pipeline_state_ = found_pipeline_state; - *pipeline_state_handle_out = found_pipeline_state; - *root_signature_out = found_pipeline_state->description.root_signature; + Pipeline* found_pipeline = it->second; + if (!std::memcmp(&found_pipeline->description.description, &description, + sizeof(description))) { + current_pipeline_ = found_pipeline; + *pipeline_handle_out = found_pipeline; + *root_signature_out = found_pipeline->description.root_signature; return true; } } @@ -974,33 +956,32 @@ bool PipelineCache::ConfigurePipeline( return false; } - PipelineState* new_pipeline_state = new PipelineState; - new_pipeline_state->state = nullptr; - std::memcpy(&new_pipeline_state->description, &runtime_description, + Pipeline* new_pipeline = new Pipeline; + new_pipeline->state = nullptr; + std::memcpy(&new_pipeline->description, &runtime_description, sizeof(runtime_description)); - pipeline_states_.insert(std::make_pair(hash, new_pipeline_state)); - COUNT_profile_set("gpu/pipeline_cache/pipeline_states", - pipeline_states_.size()); + pipelines_.emplace(hash, new_pipeline); + COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size()); if (!creation_threads_.empty()) { - // Submit the pipeline state object for creation to any available thread. + // Submit the pipeline for creation to any available thread. { std::lock_guard lock(creation_request_lock_); - creation_queue_.push_back(new_pipeline_state); + creation_queue_.push_back(new_pipeline); } creation_request_cond_.notify_one(); } else { - new_pipeline_state->state = CreateD3D12PipelineState(runtime_description); + new_pipeline->state = CreateD3D12Pipeline(runtime_description); } - if (pipeline_state_storage_file_) { + if (pipeline_storage_file_) { assert_not_null(storage_write_thread_); - pipeline_state_storage_file_flush_needed_ = true; + pipeline_storage_file_flush_needed_ = true; { std::lock_guard lock(storage_write_request_lock_); - storage_write_pipeline_state_queue_.emplace_back(); + storage_write_pipeline_queue_.emplace_back(); PipelineStoredDescription& stored_description = - storage_write_pipeline_state_queue_.back(); + storage_write_pipeline_queue_.back(); stored_description.description_hash = hash; std::memcpy(&stored_description.description, &description, sizeof(description)); @@ -1008,8 +989,8 @@ bool PipelineCache::ConfigurePipeline( storage_write_request_cond_.notify_all(); } - current_pipeline_state_ = new_pipeline_state; - *pipeline_state_handle_out = new_pipeline_state; + current_pipeline_ = new_pipeline; + *pipeline_handle_out = new_pipeline; *root_signature_out = runtime_description.root_signature; return true; } @@ -1136,8 +1117,8 @@ bool PipelineCache::TranslateShader( std::memcpy( texture_binding_layouts_.data() + new_uid.vector_span_offset, texture_bindings, texture_binding_layout_bytes); - texture_binding_layout_map_.insert( - {texture_binding_layout_hash, new_uid}); + texture_binding_layout_map_.emplace(texture_binding_layout_hash, + new_uid); } } if (bindless_sampler_count) { @@ -1179,8 +1160,8 @@ bool PipelineCache::TranslateShader( vector_bindless_sampler_layout[i] = sampler_bindings[i].bindless_descriptor_index; } - bindless_sampler_layout_map_.insert( - {bindless_sampler_layout_hash, new_uid}); + bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash, + new_uid); } } } @@ -1508,8 +1489,7 @@ bool PipelineCache::GetCurrentStateDescription( /* 16 */ PipelineBlendFactor::kSrcAlphaSat, }; // Like kBlendFactorMap, but with color modes changed to alpha. Some - // pipeline state objects aren't created in Prey because a color mode is - // used for alpha. + // pipelines aren't created in Prey because a color mode is used for alpha. static const PipelineBlendFactor kBlendFactorAlphaMap[32] = { /* 0 */ PipelineBlendFactor::kZero, /* 1 */ PipelineBlendFactor::kOne, @@ -1569,18 +1549,16 @@ bool PipelineCache::GetCurrentStateDescription( return true; } -ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState( +ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline( const PipelineRuntimeDescription& runtime_description) { const PipelineDescription& description = runtime_description.description; if (runtime_description.pixel_shader != nullptr) { - XELOGGPU( - "Creating graphics pipeline state with VS {:016X}" - ", PS {:016X}", - runtime_description.vertex_shader->ucode_data_hash(), - runtime_description.pixel_shader->ucode_data_hash()); + XELOGGPU("Creating graphics pipeline with VS {:016X}, PS {:016X}", + runtime_description.vertex_shader->ucode_data_hash(), + runtime_description.pixel_shader->ucode_data_hash()); } else { - XELOGGPU("Creating graphics pipeline state with VS {:016X}", + XELOGGPU("Creating graphics pipeline with VS {:016X}", runtime_description.vertex_shader->ucode_data_hash()); } @@ -1893,20 +1871,18 @@ ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState( } } - // Create the pipeline state object. + // Create the D3D12 pipeline state object. auto device = command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); ID3D12PipelineState* state; if (FAILED(device->CreateGraphicsPipelineState(&state_desc, IID_PPV_ARGS(&state)))) { if (runtime_description.pixel_shader != nullptr) { - XELOGE( - "Failed to create graphics pipeline state with VS {:016X}" - ", PS {:016X}", - runtime_description.vertex_shader->ucode_data_hash(), - runtime_description.pixel_shader->ucode_data_hash()); + XELOGE("Failed to create graphics pipeline with VS {:016X}, PS {:016X}", + runtime_description.vertex_shader->ucode_data_hash(), + runtime_description.pixel_shader->ucode_data_hash()); } else { - XELOGE("Failed to create graphics pipeline state with VS {:016X}", + XELOGE("Failed to create graphics pipeline with VS {:016X}", runtime_description.vertex_shader->ucode_data_hash()); } return nullptr; @@ -1933,7 +1909,7 @@ void PipelineCache::StorageWriteThread() { ucode_guest_endian.reserve(0xFFFF); bool flush_shaders = false; - bool flush_pipeline_states = false; + bool flush_pipelines = false; while (true) { if (flush_shaders) { @@ -1941,15 +1917,15 @@ void PipelineCache::StorageWriteThread() { assert_not_null(shader_storage_file_); fflush(shader_storage_file_); } - if (flush_pipeline_states) { - flush_pipeline_states = false; - assert_not_null(pipeline_state_storage_file_); - fflush(pipeline_state_storage_file_); + if (flush_pipelines) { + flush_pipelines = false; + assert_not_null(pipeline_storage_file_); + fflush(pipeline_storage_file_); } std::pair shader_pair = {}; PipelineStoredDescription pipeline_description; - bool write_pipeline_state = false; + bool write_pipeline = false; { std::unique_lock lock(storage_write_request_lock_); if (storage_write_thread_shutdown_) { @@ -1962,17 +1938,17 @@ void PipelineCache::StorageWriteThread() { storage_write_flush_shaders_ = false; flush_shaders = true; } - if (!storage_write_pipeline_state_queue_.empty()) { + if (!storage_write_pipeline_queue_.empty()) { std::memcpy(&pipeline_description, - &storage_write_pipeline_state_queue_.front(), + &storage_write_pipeline_queue_.front(), sizeof(pipeline_description)); - storage_write_pipeline_state_queue_.pop_front(); - write_pipeline_state = true; - } else if (storage_write_flush_pipeline_states_) { - storage_write_flush_pipeline_states_ = false; - flush_pipeline_states = true; + storage_write_pipeline_queue_.pop_front(); + write_pipeline = true; + } else if (storage_write_flush_pipelines_) { + storage_write_flush_pipelines_ = false; + flush_pipelines = true; } - if (!shader_pair.first && !write_pipeline_state) { + if (!shader_pair.first && !write_pipeline) { storage_write_request_cond_.wait(lock); continue; } @@ -1999,27 +1975,26 @@ void PipelineCache::StorageWriteThread() { } } - if (write_pipeline_state) { - assert_not_null(pipeline_state_storage_file_); + if (write_pipeline) { + assert_not_null(pipeline_storage_file_); fwrite(&pipeline_description, sizeof(pipeline_description), 1, - pipeline_state_storage_file_); + pipeline_storage_file_); } } } void PipelineCache::CreationThread(size_t thread_index) { while (true) { - PipelineState* pipeline_state_to_create = nullptr; + Pipeline* pipeline_to_create = nullptr; // Check if need to shut down or set the completion event and dequeue the - // pipeline state if there is any. + // pipeline if there is any. { std::unique_lock lock(creation_request_lock_); if (thread_index >= creation_threads_shutdown_from_ || creation_queue_.empty()) { if (creation_completion_set_event_ && creation_threads_busy_ == 0) { - // Last pipeline state object in the queue created - signal the event - // if requested. + // Last pipeline in the queue created - signal the event if requested. creation_completion_set_event_ = false; creation_completion_event_->Set(); } @@ -2029,23 +2004,22 @@ void PipelineCache::CreationThread(size_t thread_index) { creation_request_cond_.wait(lock); continue; } - // Take the pipeline state from the queue and increment the busy thread - // count until the pipeline state object is created - other threads must - // be able to dequeue requests, but can't set the completion event until - // the pipeline state objects are fully created (rather than just started - // creating). - pipeline_state_to_create = creation_queue_.front(); + // Take the pipeline from the queue and increment the busy thread count + // until the pipeline is created - other threads must be able to dequeue + // requests, but can't set the completion event until the pipelines are + // fully created (rather than just started creating). + pipeline_to_create = creation_queue_.front(); creation_queue_.pop_front(); ++creation_threads_busy_; } // Create the D3D12 pipeline state object. - pipeline_state_to_create->state = - CreateD3D12PipelineState(pipeline_state_to_create->description); + pipeline_to_create->state = + CreateD3D12Pipeline(pipeline_to_create->description); - // Pipeline state object created - the thread is not busy anymore, safe to - // set the completion event if needed (at the next iteration, or in some - // other thread). + // Pipeline created - the thread is not busy anymore, safe to set the + // completion event if needed (at the next iteration, or in some other + // thread). { std::lock_guard lock(creation_request_lock_); --creation_threads_busy_; @@ -2053,20 +2027,20 @@ void PipelineCache::CreationThread(size_t thread_index) { } } -void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() { +void PipelineCache::CreateQueuedPipelinesOnProcessorThread() { assert_false(creation_threads_.empty()); while (true) { - PipelineState* pipeline_state_to_create; + Pipeline* pipeline_to_create; { std::lock_guard lock(creation_request_lock_); if (creation_queue_.empty()) { break; } - pipeline_state_to_create = creation_queue_.front(); + pipeline_to_create = creation_queue_.front(); creation_queue_.pop_front(); } - pipeline_state_to_create->state = - CreateD3D12PipelineState(pipeline_state_to_create->description); + pipeline_to_create->state = + CreateD3D12Pipeline(pipeline_to_create->description); } } diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index cdc6ed5f3..ee7f0a7de 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -29,6 +29,7 @@ #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/xenos.h" +#include "xenia/ui/d3d12/d3d12_api.h" namespace xe { namespace gpu { @@ -54,7 +55,7 @@ class PipelineCache { void ShutdownShaderStorage(); void EndSubmission(); - bool IsCreatingPipelineStates(); + bool IsCreatingPipelines(); D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count); @@ -73,14 +74,12 @@ class PipelineCache { xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], - void** pipeline_state_handle_out, - ID3D12RootSignature** root_signature_out); + void** pipeline_handle_out, ID3D12RootSignature** root_signature_out); - // Returns a pipeline state object with deferred creation by its handle. May - // return nullptr if failed to create the pipeline state object. - inline ID3D12PipelineState* GetD3D12PipelineStateByHandle( - void* handle) const { - return reinterpret_cast(handle)->state; + // Returns a pipeline with deferred creation by its handle. May return nullptr + // if failed to create the pipeline. + inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const { + return reinterpret_cast(handle)->state; } private: @@ -237,7 +236,7 @@ class PipelineCache { const RenderTargetCache::PipelineRenderTarget render_targets[5], PipelineRuntimeDescription& runtime_description_out); - ID3D12PipelineState* CreateD3D12PipelineState( + ID3D12PipelineState* CreateD3D12Pipeline( const PipelineRuntimeDescription& runtime_description); D3D12CommandProcessor& command_processor_; @@ -255,9 +254,9 @@ class PipelineCache { IDxcUtils* dxc_utils_ = nullptr; IDxcCompiler* dxc_compiler_ = nullptr; - // All loaded shaders mapped by their guest hash key. + // Ucode hash -> shader. std::unordered_map> - shader_map_; + shaders_; struct LayoutUID { size_t uid; @@ -285,21 +284,20 @@ class PipelineCache { // Xenos pixel shader provided. std::vector depth_only_pixel_shader_; - struct PipelineState { + struct Pipeline { // nullptr if creation has failed. ID3D12PipelineState* state; PipelineRuntimeDescription description; }; - // All previously generated pipeline state objects identified by hash and the - // description. - std::unordered_multimap> - pipeline_states_; + pipelines_; - // Previously used pipeline state object. This matches our current state - // settings and allows us to quickly(ish) reuse the pipeline state if no - // registers have changed. - PipelineState* current_pipeline_state_ = nullptr; + // Previously used pipeline. This matches our current state settings and + // allows us to quickly(ish) reuse the pipeline if no registers have been + // changed. + Pipeline* current_pipeline_ = nullptr; // Currently open shader storage path. std::filesystem::path shader_storage_root_; @@ -309,10 +307,9 @@ class PipelineCache { FILE* shader_storage_file_ = nullptr; bool shader_storage_file_flush_needed_ = false; - // Pipeline state storage output stream, for preload in the next emulator - // runs. - FILE* pipeline_state_storage_file_ = nullptr; - bool pipeline_state_storage_file_flush_needed_ = false; + // Pipeline storage output stream, for preload in the next emulator runs. + FILE* pipeline_storage_file_ = nullptr; + bool pipeline_storage_file_flush_needed_ = false; // Thread for asynchronous writing to the storage streams. void StorageWriteThread(); @@ -322,28 +319,27 @@ class PipelineCache { // thread is notified about its change via storage_write_request_cond_. std::deque> storage_write_shader_queue_; - std::deque storage_write_pipeline_state_queue_; + std::deque storage_write_pipeline_queue_; bool storage_write_flush_shaders_ = false; - bool storage_write_flush_pipeline_states_ = false; + bool storage_write_flush_pipelines_ = false; bool storage_write_thread_shutdown_ = false; std::unique_ptr storage_write_thread_; - // Pipeline state object creation threads. + // Pipeline creation threads. void CreationThread(size_t thread_index); - void CreateQueuedPipelineStatesOnProcessorThread(); + void CreateQueuedPipelinesOnProcessorThread(); std::mutex creation_request_lock_; std::condition_variable creation_request_cond_; // Protected with creation_request_lock_, notify_one creation_request_cond_ // when set. - std::deque creation_queue_; - // Number of threads that are currently creating a pipeline state object - - // incremented when a pipeline state object is dequeued (the completion event - // can't be triggered before this is zero). Protected with - // creation_request_lock_. + std::deque creation_queue_; + // Number of threads that are currently creating a pipeline - incremented when + // a pipeline is dequeued (the completion event can't be triggered before this + // is zero). Protected with creation_request_lock_. size_t creation_threads_busy_ = 0; - // Manual-reset event set when the last queued pipeline state object is - // created and there are no more pipeline state objects to create. This is - // triggered by the thread creating the last pipeline state object. + // Manual-reset event set when the last queued pipeline is created and there + // are no more pipelines to create. This is triggered by the thread creating + // the last pipeline. std::unique_ptr creation_completion_event_; // Whether setting the event on completion is queued. Protected with // creation_request_lock_, notify_one creation_request_cond_ when set. diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index d4f989123..90ba11ac5 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -454,8 +454,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( // again and again and exit. if (!conversion_needed || converted_index_count == 0) { converted_indices.gpu_address = 0; - converted_indices_cache_.insert( - std::make_pair(converted_indices.key.value, converted_indices)); + converted_indices_cache_.emplace(converted_indices.key.value, + converted_indices); memory_regions_used_ |= memory_regions_used_bits; return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty : ConversionResult::kConversionNotNeeded; @@ -670,8 +670,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( // Cache and return the indices. converted_indices.gpu_address = gpu_address; - converted_indices_cache_.insert( - std::make_pair(converted_indices.key.value, converted_indices)); + converted_indices_cache_.emplace(converted_indices.key.value, + converted_indices); memory_regions_used_ |= memory_regions_used_bits; gpu_address_out = gpu_address; index_count_out = converted_index_count; diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index b2c964a55..66ef2ba9f 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -277,20 +277,19 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { return false; } - // Create the EDRAM load/store pipeline state objects. + // Create the EDRAM load/store pipelines. for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) { const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i]; - edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState( + edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline( device, mode_info.load_shader, mode_info.load_shader_size, edram_load_store_root_signature_); - edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState( + edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline( device, mode_info.store_shader, mode_info.store_shader_size, edram_load_store_root_signature_); if (edram_load_pipelines_[i] == nullptr || edram_store_pipelines_[i] == nullptr) { - XELOGE( - "Failed to create the EDRAM load/store pipeline states for mode {}", - i); + XELOGE("Failed to create the EDRAM load/store pipelines for mode {}", + i); Shutdown(); return false; } @@ -299,7 +298,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { } } - // Create the resolve root signatures and pipeline state objects. + // Create the resolve root signatures and pipelines. D3D12_ROOT_PARAMETER resolve_root_parameters[3]; // Copying root signature. @@ -369,7 +368,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { return false; } - // Copying pipeline state objects. + // Copying pipelines. uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1; for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount); ++i) { @@ -381,63 +380,61 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { continue; } const auto& resolve_copy_shader = resolve_copy_shaders_[i]; - ID3D12PipelineState* resolve_copy_pipeline_state = - ui::d3d12::util::CreateComputePipelineState( + ID3D12PipelineState* resolve_copy_pipeline = + ui::d3d12::util::CreateComputePipeline( device, resolve_copy_shader.first, resolve_copy_shader.second, resolve_copy_root_signature_); - if (resolve_copy_pipeline_state == nullptr) { - XELOGE("Failed to create {} resolve copy pipeline state", + if (resolve_copy_pipeline == nullptr) { + XELOGE("Failed to create {} resolve copy pipeline", resolve_copy_shader_info.debug_name); } - resolve_copy_pipeline_state->SetName(reinterpret_cast( + resolve_copy_pipeline->SetName(reinterpret_cast( xe::to_utf16(resolve_copy_shader_info.debug_name).c_str())); - resolve_copy_pipeline_states_[i] = resolve_copy_pipeline_state; + resolve_copy_pipelines_[i] = resolve_copy_pipeline; } - // Clearing pipeline state objects. - resolve_clear_32bpp_pipeline_state_ = - ui::d3d12::util::CreateComputePipelineState( - device, - resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs - : resolve_clear_32bpp_cs, - resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs) - : sizeof(resolve_clear_32bpp_cs), - resolve_clear_root_signature_); - if (resolve_clear_32bpp_pipeline_state_ == nullptr) { - XELOGE("Failed to create the 32bpp resolve clear pipeline state"); + // Clearing pipelines. + resolve_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline( + device, + resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs + : resolve_clear_32bpp_cs, + resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs) + : sizeof(resolve_clear_32bpp_cs), + resolve_clear_root_signature_); + if (resolve_clear_32bpp_pipeline_ == nullptr) { + XELOGE("Failed to create the 32bpp resolve clear pipeline"); Shutdown(); return false; } - resolve_clear_32bpp_pipeline_state_->SetName(L"Resolve Clear 32bpp"); - resolve_clear_64bpp_pipeline_state_ = - ui::d3d12::util::CreateComputePipelineState( - device, - resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs - : resolve_clear_64bpp_cs, - resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs) - : sizeof(resolve_clear_64bpp_cs), - resolve_clear_root_signature_); - if (resolve_clear_64bpp_pipeline_state_ == nullptr) { - XELOGE("Failed to create the 64bpp resolve clear pipeline state"); + resolve_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp"); + resolve_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline( + device, + resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs + : resolve_clear_64bpp_cs, + resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs) + : sizeof(resolve_clear_64bpp_cs), + resolve_clear_root_signature_); + if (resolve_clear_64bpp_pipeline_ == nullptr) { + XELOGE("Failed to create the 64bpp resolve clear pipeline"); Shutdown(); return false; } - resolve_clear_64bpp_pipeline_state_->SetName(L"Resolve Clear 64bpp"); + resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp"); if (!edram_rov_used_) { assert_false(resolution_scale_2x_); - resolve_clear_depth_24_32_pipeline_state_ = - ui::d3d12::util::CreateComputePipelineState( + resolve_clear_depth_24_32_pipeline_ = + ui::d3d12::util::CreateComputePipeline( device, resolve_clear_depth_24_32_cs, sizeof(resolve_clear_depth_24_32_cs), resolve_clear_root_signature_); - if (resolve_clear_depth_24_32_pipeline_state_ == nullptr) { + if (resolve_clear_depth_24_32_pipeline_ == nullptr) { XELOGE( "Failed to create the 24-bit and 32-bit depth resolve clear pipeline " "state"); Shutdown(); return false; } - resolve_clear_64bpp_pipeline_state_->SetName( + resolve_clear_64bpp_pipeline_->SetName( L"Resolve Clear 24-bit & 32-bit Depth"); } @@ -451,12 +448,12 @@ void RenderTargetCache::Shutdown() { edram_snapshot_restore_pool_.reset(); ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_); - ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_state_); - ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_state_); - ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_state_); + ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_); + ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_); + ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_); ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_); - for (size_t i = 0; i < xe::countof(resolve_copy_pipeline_states_); ++i) { - ui::d3d12::util::ReleaseAndNull(resolve_copy_pipeline_states_[i]); + for (size_t i = 0; i < xe::countof(resolve_copy_pipelines_); ++i) { + ui::d3d12::util::ReleaseAndNull(resolve_copy_pipelines_[i]); } ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_); for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) { @@ -1209,8 +1206,8 @@ bool RenderTargetCache::Resolve(const Memory& memory, 0, sizeof(copy_shader_constants) / sizeof(uint32_t), ©_shader_constants, 0); } - command_processor_.SetComputePipelineState( - resolve_copy_pipeline_states_[size_t(copy_shader)]); + command_processor_.SetComputePipeline( + resolve_copy_pipelines_[size_t(copy_shader)]); command_processor_.SubmitBarriers(); command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1); @@ -1279,9 +1276,9 @@ bool RenderTargetCache::Resolve(const Memory& memory, command_list.D3DSetComputeRoot32BitConstants( 0, sizeof(depth_clear_constants) / sizeof(uint32_t), &depth_clear_constants, 0); - command_processor_.SetComputePipelineState( - clear_float32_depth ? resolve_clear_depth_24_32_pipeline_state_ - : resolve_clear_32bpp_pipeline_state_); + command_processor_.SetComputePipeline( + clear_float32_depth ? resolve_clear_depth_24_32_pipeline_ + : resolve_clear_32bpp_pipeline_); command_processor_.SubmitBarriers(); command_list.D3DDispatch(clear_group_count.first, clear_group_count.second, 1); @@ -1301,10 +1298,10 @@ bool RenderTargetCache::Resolve(const Memory& memory, 0, sizeof(color_clear_constants) / sizeof(uint32_t), &color_clear_constants, 0); } - command_processor_.SetComputePipelineState( + command_processor_.SetComputePipeline( resolve_info.color_edram_info.format_is_64bpp - ? resolve_clear_64bpp_pipeline_state_ - : resolve_clear_32bpp_pipeline_state_); + ? resolve_clear_64bpp_pipeline_ + : resolve_clear_32bpp_pipeline_); command_processor_.SubmitBarriers(); command_list.D3DDispatch(clear_group_count.first, clear_group_count.second, 1); @@ -1816,7 +1813,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( render_target->footprints, nullptr, nullptr, ©_buffer_size); render_target->copy_buffer_size = uint32_t(copy_buffer_size); - render_targets_.insert(std::make_pair(key.value, render_target)); + render_targets_.emplace(key.value, render_target); COUNT_profile_set("gpu/render_target_cache/render_targets", render_targets_.size()); #if 0 @@ -2015,8 +2012,7 @@ void RenderTargetCache::StoreRenderTargetsToEdram() { 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth, render_target->key.format); - command_processor_.SetComputePipelineState( - edram_store_pipelines_[size_t(mode)]); + command_processor_.SetComputePipeline(edram_store_pipelines_[size_t(mode)]); // 1 group per 80x16 samples. command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1); @@ -2140,8 +2136,7 @@ void RenderTargetCache::LoadRenderTargetsFromEdram( 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth, render_target->key.format); - command_processor_.SetComputePipelineState( - edram_load_pipelines_[size_t(mode)]); + command_processor_.SetComputePipeline(edram_load_pipelines_[size_t(mode)]); // 1 group per 80x16 samples. command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1); diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index 0def0d25c..bc68c68a9 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -237,14 +237,13 @@ class D3D12CommandProcessor; // get each of the 4 host pixels for each sample. class RenderTargetCache { public: - // Direct3D 12 debug layer does some kaschenit-style trolling by giving errors - // that contradict each other when you use null RTV descriptors - if you set - // a valid format in RTVFormats in the pipeline state, it says that null - // descriptors can only be used if the format in the pipeline state is - // DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains - // that the format in the pipeline doesn't match the RTV format. So we have to - // make render target bindings consecutive and remap the output indices in - // pixel shaders. + // Direct3D 12 debug layer is giving errors that contradict each other when + // you use null RTV descriptors - if you set a valid format in RTVFormats in + // the pipeline state, it says that null descriptors can only be used if the + // format in the pipeline state is DXGI_FORMAT_UNKNOWN, however, if + // DXGI_FORMAT_UNKNOWN is set, it complains that the format in the pipeline + // state doesn't match the RTV format. So we have to make render target + // bindings consecutive and remap the output indices in pixel shaders. struct PipelineRenderTarget { uint32_t guest_render_target; DXGI_FORMAT format; @@ -537,7 +536,7 @@ class RenderTargetCache { // 16: - EDRAM pitch in tiles. uint32_t base_samples_2x_depth_pitch; }; - // EDRAM pipeline states for the RTV/DSV path. + // EDRAM pipelines for the RTV/DSV path. static const EdramLoadStoreModeInfo edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)]; ID3D12PipelineState* @@ -546,20 +545,20 @@ class RenderTargetCache { ID3D12PipelineState* edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {}; - // Resolve root signatures and pipeline state objects. + // Resolve root signatures and pipelines. ID3D12RootSignature* resolve_copy_root_signature_ = nullptr; static const std::pair resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)]; - ID3D12PipelineState* resolve_copy_pipeline_states_[size_t( + ID3D12PipelineState* resolve_copy_pipelines_[size_t( draw_util::ResolveCopyShaderIndex::kCount)] = {}; ID3D12RootSignature* resolve_clear_root_signature_ = nullptr; // Clearing 32bpp color, depth with ROV, or unorm depth without ROV. - ID3D12PipelineState* resolve_clear_32bpp_pipeline_state_ = nullptr; + ID3D12PipelineState* resolve_clear_32bpp_pipeline_ = nullptr; // Clearing 64bpp color. - ID3D12PipelineState* resolve_clear_64bpp_pipeline_state_ = nullptr; + ID3D12PipelineState* resolve_clear_64bpp_pipeline_ = nullptr; // Clearing float depth without ROV, both the float24 and the host float32 // versions. - ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_state_ = nullptr; + ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_ = nullptr; // FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on // Nvidia Maxwell 1st generation and older. diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index c8b1e6297..44d76c9ed 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -918,27 +918,24 @@ bool TextureCache::Initialize(bool edram_rov_used) { return false; } - // Create the loading pipeline state objects. + // Create the loading pipelines. for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { const LoadModeInfo& mode_info = load_mode_info_[i]; - load_pipeline_states_[i] = ui::d3d12::util::CreateComputePipelineState( + load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline( device, mode_info.shader, mode_info.shader_size, load_root_signature_); - if (load_pipeline_states_[i] == nullptr) { - XELOGE( - "Failed to create the texture loading pipeline state object for mode " - "{}", - i); + if (load_pipelines_[i] == nullptr) { + XELOGE("Failed to create the texture loading pipeline for mode {}", i); Shutdown(); return false; } if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) { - load_pipeline_states_2x_[i] = ui::d3d12::util::CreateComputePipelineState( + load_pipelines_2x_[i] = ui::d3d12::util::CreateComputePipeline( device, mode_info.shader_2x, mode_info.shader_2x_size, load_root_signature_); - if (load_pipeline_states_2x_[i] == nullptr) { + if (load_pipelines_2x_[i] == nullptr) { XELOGE( - "Failed to create the 2x-scaled texture loading pipeline state " - "for mode {}", + "Failed to create the 2x-scaled texture loading pipeline for mode " + "{}", i); Shutdown(); return false; @@ -1024,8 +1021,8 @@ void TextureCache::Shutdown() { ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_); for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { - ui::d3d12::util::ReleaseAndNull(load_pipeline_states_2x_[i]); - ui::d3d12::util::ReleaseAndNull(load_pipeline_states_[i]); + ui::d3d12::util::ReleaseAndNull(load_pipelines_2x_[i]); + ui::d3d12::util::ReleaseAndNull(load_pipelines_[i]); } ui::d3d12::util::ReleaseAndNull(load_root_signature_); @@ -1892,7 +1889,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { if (IsResolutionScale2X() && key.tiled) { LoadMode load_mode = GetLoadMode(key); if (load_mode != LoadMode::kUnknown && - load_pipeline_states_2x_[uint32_t(load_mode)] != nullptr) { + load_pipelines_2x_[uint32_t(load_mode)] != nullptr) { uint32_t base_size = 0, mip_size = 0; texture_util::GetTextureTotalSize( key.dimension, key.width, key.height, key.depth, key.format, @@ -2047,7 +2044,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { } texture->base_watch_handle = nullptr; texture->mip_watch_handle = nullptr; - textures_.insert(std::make_pair(map_key, texture)); + textures_.emplace(map_key, texture); COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); textures_total_size_ += texture->resource_size; COUNT_profile_set("gpu/texture_cache/total_size_mb", @@ -2079,10 +2076,10 @@ bool TextureCache::LoadTextureData(Texture* texture) { return false; } bool scaled_resolve = texture->key.scaled_resolve ? true : false; - ID3D12PipelineState* pipeline_state = - scaled_resolve ? load_pipeline_states_2x_[uint32_t(load_mode)] - : load_pipeline_states_[uint32_t(load_mode)]; - if (pipeline_state == nullptr) { + ID3D12PipelineState* pipeline = scaled_resolve + ? load_pipelines_2x_[uint32_t(load_mode)] + : load_pipelines_[uint32_t(load_mode)]; + if (pipeline == nullptr) { return false; } const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)]; @@ -2296,7 +2293,7 @@ bool TextureCache::LoadTextureData(Texture* texture) { load_mode_info.srv_bpe_log2); } } - command_processor_.SetComputePipelineState(pipeline_state); + command_processor_.SetComputePipeline(pipeline); command_list.D3DSetComputeRootSignature(load_root_signature_); command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second); @@ -2597,7 +2594,7 @@ uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture, } device->CreateShaderResourceView( texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index)); - texture.srv_descriptors.insert({descriptor_key, descriptor_index}); + texture.srv_descriptors.emplace(descriptor_key, descriptor_index); return descriptor_index; } diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index 1345d8faf..0e66328f0 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -550,9 +550,9 @@ class TextureCache { static const LoadModeInfo load_mode_info_[]; ID3D12RootSignature* load_root_signature_ = nullptr; - ID3D12PipelineState* load_pipeline_states_[size_t(LoadMode::kCount)] = {}; - // Load pipeline state objects for 2x-scaled resolved targets. - ID3D12PipelineState* load_pipeline_states_2x_[size_t(LoadMode::kCount)] = {}; + ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {}; + // Load pipelines for 2x-scaled resolved targets. + ID3D12PipelineState* load_pipelines_2x_[size_t(LoadMode::kCount)] = {}; std::unordered_multimap textures_; uint64_t textures_total_size_ = 0; diff --git a/src/xenia/gpu/dxbc_shader_translator_fetch.cc b/src/xenia/gpu/dxbc_shader_translator_fetch.cc index 92be28630..76eed4d10 100644 --- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc +++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc @@ -99,8 +99,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0), DxbcSrc::LU(~uint32_t(3))); } - // Add the word offset from the instruction, plus the offset of the first - // needed word within the element. + // Add the word offset from the instruction (signed), plus the offset of the + // first needed word within the element. uint32_t first_word_index; xe::bit_scan_forward(needed_words, &first_word_index); int32_t first_word_buffer_offset = diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index d253bdad0..23998c307 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -65,17 +65,17 @@ enum class InstructionStorageTarget { // disassembly (because oPts.x000 will be assembled, but oPts.x00_ has both // skipped components and zeros, which cannot be encoded, and therefore it will // not). -constexpr uint32_t GetInstructionStorageTargetUsedComponents( +constexpr uint32_t GetInstructionStorageTargetUsedComponentCount( InstructionStorageTarget target) { switch (target) { case InstructionStorageTarget::kNone: - return 0b0000; + return 0; case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: - return 0b0111; + return 3; case InstructionStorageTarget::kDepth: - return 0b0001; + return 1; default: - return 0b1111; + return 4; } } @@ -136,8 +136,9 @@ struct InstructionResult { // Returns the write mask containing only components actually present in the // target. uint32_t GetUsedWriteMask() const { - return original_write_mask & - GetInstructionStorageTargetUsedComponents(storage_target); + uint32_t target_component_count = + GetInstructionStorageTargetUsedComponentCount(storage_target); + return original_write_mask & ((1 << target_component_count) - 1); } // True if the components are in their 'standard' swizzle arrangement (xyzw). bool IsStandardSwizzle() const { @@ -161,6 +162,28 @@ struct InstructionResult { } return used_components; } + // Returns which components of the used write mask are constant, and what + // values they have. + uint32_t GetUsedConstantComponents(uint32_t& constant_values_out) const { + uint32_t constant_components = 0; + uint32_t constant_values = 0; + uint32_t used_write_mask = GetUsedWriteMask(); + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_write_mask & (1 << i))) { + continue; + } + SwizzleSource component = components[i]; + if (component >= SwizzleSource::kX && component <= SwizzleSource::kW) { + continue; + } + constant_components |= 1 << i; + if (component == SwizzleSource::k1) { + constant_values |= 1 << i; + } + } + constant_values_out = constant_values; + return constant_components; + } }; enum class InstructionStorageSource { diff --git a/src/xenia/gpu/shared_memory.h b/src/xenia/gpu/shared_memory.h index 496836a38..98719b670 100644 --- a/src/xenia/gpu/shared_memory.h +++ b/src/xenia/gpu/shared_memory.h @@ -25,6 +25,9 @@ namespace gpu { // system page size granularity. class SharedMemory { public: + static constexpr uint32_t kBufferSizeLog2 = 29; + static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2; + virtual ~SharedMemory(); // Call in the implementation-specific ClearCache. virtual void ClearCache(); @@ -98,9 +101,6 @@ class SharedMemory { // destructor. void ShutdownCommon(); - static constexpr uint32_t kBufferSizeLog2 = 29; - static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2; - // Sparse allocations are 4 MB, so not too many of them are allocated, but // also not to waste too much memory for padding (with 16 MB there's too // much). diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc index b9e23dc93..5c0a104e5 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc @@ -118,15 +118,15 @@ bool D3D12ImmediateDrawer::Initialize() { return false; } - // Create the pipeline states. - D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_desc = {}; - pipeline_state_desc.pRootSignature = root_signature_; - pipeline_state_desc.VS.pShaderBytecode = immediate_vs; - pipeline_state_desc.VS.BytecodeLength = sizeof(immediate_vs); - pipeline_state_desc.PS.pShaderBytecode = immediate_ps; - pipeline_state_desc.PS.BytecodeLength = sizeof(immediate_ps); + // Create the pipelines. + D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {}; + pipeline_desc.pRootSignature = root_signature_; + pipeline_desc.VS.pShaderBytecode = immediate_vs; + pipeline_desc.VS.BytecodeLength = sizeof(immediate_vs); + pipeline_desc.PS.pShaderBytecode = immediate_ps; + pipeline_desc.PS.BytecodeLength = sizeof(immediate_ps); D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc = - pipeline_state_desc.BlendState.RenderTarget[0]; + pipeline_desc.BlendState.RenderTarget[0]; pipeline_blend_desc.BlendEnable = TRUE; pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA; pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA; @@ -138,11 +138,11 @@ bool D3D12ImmediateDrawer::Initialize() { pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED | D3D12_COLOR_WRITE_ENABLE_GREEN | D3D12_COLOR_WRITE_ENABLE_BLUE; - pipeline_state_desc.SampleMask = UINT_MAX; - pipeline_state_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; - pipeline_state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; - pipeline_state_desc.RasterizerState.FrontCounterClockwise = FALSE; - pipeline_state_desc.RasterizerState.DepthClipEnable = TRUE; + pipeline_desc.SampleMask = UINT_MAX; + pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + pipeline_desc.RasterizerState.FrontCounterClockwise = FALSE; + pipeline_desc.RasterizerState.DepthClipEnable = TRUE; D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {}; pipeline_input_elements[0].SemanticName = "POSITION"; pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT; @@ -154,26 +154,24 @@ bool D3D12ImmediateDrawer::Initialize() { pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM; pipeline_input_elements[2].AlignedByteOffset = offsetof(ImmediateVertex, color); - pipeline_state_desc.InputLayout.pInputElementDescs = pipeline_input_elements; - pipeline_state_desc.InputLayout.NumElements = + pipeline_desc.InputLayout.pInputElementDescs = pipeline_input_elements; + pipeline_desc.InputLayout.NumElements = UINT(xe::countof(pipeline_input_elements)); - pipeline_state_desc.PrimitiveTopologyType = - D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - pipeline_state_desc.NumRenderTargets = 1; - pipeline_state_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat; - pipeline_state_desc.SampleDesc.Count = 1; + pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + pipeline_desc.NumRenderTargets = 1; + pipeline_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat; + pipeline_desc.SampleDesc.Count = 1; if (FAILED(device->CreateGraphicsPipelineState( - &pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_triangle_)))) { + &pipeline_desc, IID_PPV_ARGS(&pipeline_triangle_)))) { XELOGE( "Failed to create the Direct3D 12 immediate drawer triangle pipeline " "state"); Shutdown(); return false; } - pipeline_state_desc.PrimitiveTopologyType = - D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; if (FAILED(device->CreateGraphicsPipelineState( - &pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_line_)))) { + &pipeline_desc, IID_PPV_ARGS(&pipeline_line_)))) { XELOGE( "Failed to create the Direct3D 12 immediate drawer line pipeline " "state"); @@ -267,8 +265,8 @@ void D3D12ImmediateDrawer::Shutdown() { util::ReleaseAndNull(sampler_heap_); - util::ReleaseAndNull(pipeline_state_line_); - util::ReleaseAndNull(pipeline_state_triangle_); + util::ReleaseAndNull(pipeline_line_); + util::ReleaseAndNull(pipeline_triangle_); util::ReleaseAndNull(root_signature_); } @@ -611,17 +609,17 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { uint32_t(sampler_index))); } - // Set the primitive type and the pipeline state for it. + // Set the primitive type and the pipeline for it. D3D_PRIMITIVE_TOPOLOGY primitive_topology; - ID3D12PipelineState* pipeline_state; + ID3D12PipelineState* pipeline; switch (draw.primitive_type) { case ImmediatePrimitiveType::kLines: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; - pipeline_state = pipeline_state_line_; + pipeline = pipeline_line_; break; case ImmediatePrimitiveType::kTriangles: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - pipeline_state = pipeline_state_triangle_; + pipeline = pipeline_triangle_; break; default: assert_unhandled_case(draw.primitive_type); @@ -630,7 +628,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { if (current_primitive_topology_ != primitive_topology) { current_primitive_topology_ = primitive_topology; current_command_list_->IASetPrimitiveTopology(primitive_topology); - current_command_list_->SetPipelineState(pipeline_state); + current_command_list_->SetPipelineState(pipeline); } // Draw. diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.h b/src/xenia/ui/d3d12/d3d12_immediate_drawer.h index 4300af76e..fbc362f59 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.h +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.h @@ -105,8 +105,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer { kCount }; - ID3D12PipelineState* pipeline_state_triangle_ = nullptr; - ID3D12PipelineState* pipeline_state_line_ = nullptr; + ID3D12PipelineState* pipeline_triangle_ = nullptr; + ID3D12PipelineState* pipeline_line_ = nullptr; ID3D12DescriptorHeap* sampler_heap_ = nullptr; D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_; diff --git a/src/xenia/ui/d3d12/d3d12_util.cc b/src/xenia/ui/d3d12/d3d12_util.cc index 710d3b6db..caea2b296 100644 --- a/src/xenia/ui/d3d12/d3d12_util.cc +++ b/src/xenia/ui/d3d12/d3d12_util.cc @@ -47,7 +47,7 @@ ID3D12RootSignature* CreateRootSignature( return root_signature; } -ID3D12PipelineState* CreateComputePipelineState( +ID3D12PipelineState* CreateComputePipeline( ID3D12Device* device, const void* shader, size_t shader_size, ID3D12RootSignature* root_signature) { D3D12_COMPUTE_PIPELINE_STATE_DESC desc; diff --git a/src/xenia/ui/d3d12/d3d12_util.h b/src/xenia/ui/d3d12/d3d12_util.h index 5bce23568..062177218 100644 --- a/src/xenia/ui/d3d12/d3d12_util.h +++ b/src/xenia/ui/d3d12/d3d12_util.h @@ -39,9 +39,10 @@ inline bool ReleaseAndNull(T& object) { ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider, const D3D12_ROOT_SIGNATURE_DESC& desc); -ID3D12PipelineState* CreateComputePipelineState( - ID3D12Device* device, const void* shader, size_t shader_size, - ID3D12RootSignature* root_signature); +ID3D12PipelineState* CreateComputePipeline(ID3D12Device* device, + const void* shader, + size_t shader_size, + ID3D12RootSignature* root_signature); constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) { switch (element_size_bytes_log2) { diff --git a/src/xenia/ui/graphics_upload_buffer_pool.cc b/src/xenia/ui/graphics_upload_buffer_pool.cc index 2a780b0c9..5eb04fba3 100644 --- a/src/xenia/ui/graphics_upload_buffer_pool.cc +++ b/src/xenia/ui/graphics_upload_buffer_pool.cc @@ -71,7 +71,7 @@ void GraphicsUploadBufferPool::FlushWrites() { GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request( uint64_t submission_index, size_t size, size_t alignment, size_t& offset_out) { - assert_not_zero(alignment); + alignment = std::max(alignment, size_t(1)); assert_true(xe::is_pow2(alignment)); size = xe::align(size, alignment); assert_true(size <= page_size_); @@ -126,7 +126,7 @@ GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request( GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::RequestPartial( uint64_t submission_index, size_t size, size_t alignment, size_t& offset_out, size_t& size_out) { - assert_not_zero(alignment); + alignment = std::max(alignment, size_t(1)); assert_true(xe::is_pow2(alignment)); size = xe::align(size, alignment); size = std::min(size, page_size_);