From 6b988d43c79b32b99b88060a49b9b17f9e2c2218 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 14 Nov 2020 16:43:18 +0300 Subject: [PATCH 01/12] [D3D12] Cleanup: pipeline state -> pipeline, other things --- .../gpu/d3d12/d3d12_command_processor.cc | 111 ++--- src/xenia/gpu/d3d12/d3d12_command_processor.h | 35 +- src/xenia/gpu/d3d12/d3d12_graphics_system.cc | 5 +- src/xenia/gpu/d3d12/d3d12_shader.h | 2 +- src/xenia/gpu/d3d12/deferred_command_list.cc | 5 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 422 ++++++++---------- src/xenia/gpu/d3d12/pipeline_cache.h | 68 ++- src/xenia/gpu/d3d12/primitive_converter.cc | 8 +- src/xenia/gpu/d3d12/render_target_cache.cc | 113 +++-- src/xenia/gpu/d3d12/render_target_cache.h | 27 +- src/xenia/gpu/d3d12/texture_cache.cc | 39 +- src/xenia/gpu/d3d12/texture_cache.h | 6 +- src/xenia/gpu/dxbc_shader_translator_fetch.cc | 4 +- src/xenia/gpu/shader.h | 37 +- src/xenia/gpu/shared_memory.h | 6 +- src/xenia/ui/d3d12/d3d12_immediate_drawer.cc | 60 ++- src/xenia/ui/d3d12/d3d12_immediate_drawer.h | 4 +- src/xenia/ui/d3d12/d3d12_util.cc | 2 +- src/xenia/ui/d3d12/d3d12_util.h | 7 +- src/xenia/ui/graphics_upload_buffer_pool.cc | 4 +- 20 files changed, 475 insertions(+), 490 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index f0be8c50e..3338d5d9b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -387,7 +387,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( sampler_count_vertex); return nullptr; } - root_signatures_bindful_.insert({index, root_signature}); + root_signatures_bindful_.emplace(index, root_signature); return root_signature; } @@ -745,12 +745,11 @@ void D3D12CommandProcessor::SetSamplePositions( current_sample_positions_ = sample_positions; } -void D3D12CommandProcessor::SetComputePipelineState( - ID3D12PipelineState* pipeline_state) { - if (current_external_pipeline_state_ != pipeline_state) { - deferred_command_list_.D3DSetPipelineState(pipeline_state); - current_external_pipeline_state_ = pipeline_state; - current_cached_pipeline_state_ = nullptr; +void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) { + if (current_external_pipeline_ != pipeline) { + deferred_command_list_.D3DSetPipelineState(pipeline); + current_external_pipeline_ = pipeline; + current_cached_pipeline_ = nullptr; } } @@ -773,8 +772,16 @@ std::string D3D12CommandProcessor::GetWindowTitleText() const { } // Currently scaling is only supported with ROV. if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) { - return "Direct3D 12 - 2x"; + return "Direct3D 12 - ROV 2x"; } + // Rasterizer-ordered views are a feature very rarely used as of 2020 and + // that faces adoption complications (outside of Direct3D - on Vulkan - at + // least), but crucial to Xenia - raise awareness of its usage. + // https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319 + // "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I + // wasn't aware that Xenia D3D12 backend was using Raster Order Views + // feature" - oscarbg in that issue. + return "Direct3D 12 - ROV"; } return "Direct3D 12"; } @@ -1196,7 +1203,7 @@ bool D3D12CommandProcessor::SetupContext() { *this, *register_file_, bindless_resources_used_, edram_rov_used_, texture_cache_->IsResolutionScale2X() ? 2 : 1); if (!pipeline_cache_->Initialize()) { - XELOGE("Failed to initialize the graphics pipeline state cache"); + XELOGE("Failed to initialize the graphics pipeline cache"); return false; } @@ -1526,8 +1533,7 @@ void D3D12CommandProcessor::ShutdownContext() { // Shut down binding - bindless descriptors may be owned by subsystems like // the texture cache. - // Root signatured are used by pipeline states, thus freed after the pipeline - // states. + // Root signatures are used by pipelines, thus freed after the pipelines. ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_); ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_); for (auto it : root_signatures_bindful_) { @@ -1878,7 +1884,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, xenos::VertexShaderExportMode::kMultipass || (primitive_two_faced && pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back))) { - // All faces are culled - can't be expressed in the pipeline state. + // All faces are culled - can't be expressed in the pipeline. return true; } @@ -1954,7 +1960,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, line_loop_closing_index = 0; } - // Update the textures - this may bind pipeline state objects. + // Update the textures - this may bind pipelines. uint32_t used_texture_mask = vertex_shader->GetUsedTextureMask() | (pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0); @@ -1972,21 +1978,21 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, early_z = true; } - // Create the pipeline state object if needed and bind it. - void* pipeline_state_handle; + // Create the pipeline if needed and bind it. + void* pipeline_handle; ID3D12RootSignature* root_signature; if (!pipeline_cache_->ConfigurePipeline( vertex_shader, pixel_shader, primitive_type_converted, indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16, - early_z, pipeline_render_targets, &pipeline_state_handle, + early_z, pipeline_render_targets, &pipeline_handle, &root_signature)) { return false; } - if (current_cached_pipeline_state_ != pipeline_state_handle) { + if (current_cached_pipeline_ != pipeline_handle) { deferred_command_list_.SetPipelineStateHandle( - reinterpret_cast(pipeline_state_handle)); - current_cached_pipeline_state_ = pipeline_state_handle; - current_external_pipeline_state_ = nullptr; + reinterpret_cast(pipeline_handle)); + current_cached_pipeline_ = pipeline_handle; + current_external_pipeline_ = nullptr; } // Update viewport, scissor, blend factor and stencil reference. @@ -2005,14 +2011,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, } // Must not call anything that can change the descriptor heap from now on! - // Ensure vertex and index buffers are resident and draw. + // Ensure vertex buffers are resident. // TODO(Triang3l): Cache residency for ranges in a way similar to how texture - // validity will be tracked. + // validity is tracked. uint64_t vertex_buffers_resident[2] = {}; - for (const auto& vertex_binding : vertex_shader->vertex_bindings()) { + for (const Shader::VertexBinding& vertex_binding : + vertex_shader->vertex_bindings()) { uint32_t vfetch_index = vertex_binding.fetch_constant; if (vertex_buffers_resident[vfetch_index >> 6] & - (1ull << (vfetch_index & 63))) { + (uint64_t(1) << (vfetch_index & 63))) { continue; } const auto& vfetch_constant = regs.Get( @@ -2045,7 +2052,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, vfetch_constant.address << 2, vfetch_constant.size << 2); return false; } - vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); + vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1) + << (vfetch_index & 63); } // Gather memexport ranges and ensure the heaps for them are resident, and @@ -2517,8 +2525,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { submission_open_ = true; // Start a new deferred command list - will submit it to the real one in the - // end of the submission (when async pipeline state object creation requests - // are fulfilled). + // end of the submission (when async pipeline creation requests are + // fulfilled). deferred_command_list_.Reset(); // Reset cached state of the command list. @@ -2527,8 +2535,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { ff_blend_factor_update_needed_ = true; ff_stencil_ref_update_needed_ = true; current_sample_positions_ = xenos::MsaaSamples::k1X; - current_cached_pipeline_state_ = nullptr; - current_external_pipeline_state_ = nullptr; + current_cached_pipeline_ = nullptr; + current_external_pipeline_ = nullptr; current_graphics_root_signature_ = nullptr; current_graphics_root_up_to_date_ = 0; if (bindless_resources_used_) { @@ -2724,7 +2732,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) { } bool D3D12CommandProcessor::CanEndSubmissionImmediately() const { - return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates(); + return !submission_open_ || !pipeline_cache_->IsCreatingPipelines(); } void D3D12CommandProcessor::ClearCommandAllocatorCache() { @@ -2745,12 +2753,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() { } void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { - auto& regs = *register_file_; - #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES + const RegisterFile& regs = *register_file_; + // Window parameters. // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // See r200UpdateWindow: @@ -2846,14 +2854,14 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { scissor.right = pa_sc_window_scissor_br.br_x; scissor.bottom = pa_sc_window_scissor_br.br_y; if (!pa_sc_window_scissor_tl.window_offset_disable) { - scissor.left = - std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0)); - scissor.top = - std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0)); - scissor.right = - std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0)); - scissor.bottom = - std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0)); + scissor.left = std::max( + LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.top = std::max( + LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0)); + scissor.right = std::max( + LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.bottom = std::max( + LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0)); } scissor.left *= pixel_size_x; scissor.top *= pixel_size_y; @@ -2915,12 +2923,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( uint32_t line_loop_closing_index, xenos::Endian index_endian, uint32_t used_texture_mask, bool early_z, uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]) { - auto& regs = *register_file_; - #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES + const RegisterFile& regs = *register_file_; auto pa_cl_clip_cntl = regs.Get(); auto pa_cl_vte_cntl = regs.Get(); auto pa_su_point_minmax = regs.Get(); @@ -3103,14 +3110,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index; system_constants_.line_loop_closing_index = line_loop_closing_index; - // Vertex index offset. - dirty |= system_constants_.vertex_base_index != vgt_indx_offset; - system_constants_.vertex_base_index = vgt_indx_offset; - // Index or tessellation edge factor buffer endianness. dirty |= system_constants_.vertex_index_endian != index_endian; system_constants_.vertex_index_endian = index_endian; + // Vertex index offset. + dirty |= system_constants_.vertex_base_index != vgt_indx_offset; + system_constants_.vertex_base_index = vgt_indx_offset; + // User clip planes (UCP_ENA_#), when not CLIP_DISABLE. if (!pa_cl_clip_cntl.clip_disable) { for (uint32_t i = 0; i < 6; ++i) { @@ -3574,7 +3581,7 @@ bool D3D12CommandProcessor::UpdateBindings( float_constant_map_vertex.float_bitmap[i]; // If no float constants at all, we can reuse any buffer for them, so not // invalidating. - if (float_constant_map_vertex.float_count != 0) { + if (float_constant_count_vertex) { cbuffer_binding_float_vertex_.up_to_date = false; } } @@ -3589,7 +3596,7 @@ bool D3D12CommandProcessor::UpdateBindings( float_constant_map_pixel.float_bitmap[i]) { current_float_constant_map_pixel_[i] = float_constant_map_pixel.float_bitmap[i]; - if (float_constant_map_pixel.float_count != 0) { + if (float_constant_count_pixel) { cbuffer_binding_float_pixel_.up_to_date = false; } } @@ -3889,8 +3896,8 @@ bool D3D12CommandProcessor::UpdateBindings( sampler_parameters, provider.OffsetSamplerDescriptor( sampler_bindless_heap_cpu_start_, sampler_index)); - texture_cache_bindless_sampler_map_.insert( - {sampler_parameters.value, sampler_index}); + texture_cache_bindless_sampler_map_.emplace( + sampler_parameters.value, sampler_index); } current_sampler_bindless_indices_vertex_[j] = sampler_index; } @@ -3921,8 +3928,8 @@ bool D3D12CommandProcessor::UpdateBindings( sampler_parameters, provider.OffsetSamplerDescriptor( sampler_bindless_heap_cpu_start_, sampler_index)); - texture_cache_bindless_sampler_map_.insert( - {sampler_parameters.value, sampler_index}); + texture_cache_bindless_sampler_map_.emplace( + sampler_parameters.value, sampler_index); } current_sampler_bindless_indices_pixel_[j] = sampler_index; } diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 0b5a80e68..42c1e0092 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -186,19 +186,17 @@ class D3D12CommandProcessor : public CommandProcessor { // render targets or copying to depth render targets. void SetSamplePositions(xenos::MsaaSamples sample_positions); - // Returns a pipeline state object with deferred creation by its handle. May - // return nullptr if failed to create the pipeline state object. - inline ID3D12PipelineState* GetD3D12PipelineStateByHandle( - void* handle) const { - return pipeline_cache_->GetD3D12PipelineStateByHandle(handle); + // Returns a pipeline with deferred creation by its handle. May return nullptr + // if failed to create the pipeline. + inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const { + return pipeline_cache_->GetD3D12PipelineByHandle(handle); } - // Sets the current pipeline state to a compute one. This is for cache - // invalidation primarily. A submission must be open. - void SetComputePipelineState(ID3D12PipelineState* pipeline_state); + // Sets the current pipeline to a compute one. This is for cache invalidation + // primarily. A submission must be open. + void SetComputePipeline(ID3D12PipelineState* pipeline); - // For the pipeline state cache to call when binding layout UIDs may be - // reused. + // For the pipeline cache to call when binding layout UIDs may be reused. void NotifyShaderBindingsLayoutUIDsInvalidated(); // Returns the text to display in the GPU backend name in the window title. @@ -323,8 +321,8 @@ class D3D12CommandProcessor : public CommandProcessor { bool EndSubmission(bool is_swap); // Checks if ending a submission right now would not cause potentially more // delay than it would reduce by making the GPU start working earlier - such - // as when there are unfinished graphics pipeline state creation requests that - // would need to be fulfilled before actually submitting the command list. + // as when there are unfinished graphics pipeline creation requests that would + // need to be fulfilled before actually submitting the command list. bool CanEndSubmissionImmediately() const; bool AwaitAllQueueOperationsCompletion() { CheckSubmissionFence(submission_current_); @@ -548,13 +546,12 @@ class D3D12CommandProcessor : public CommandProcessor { // Current SSAA sample positions (to be updated by the render target cache). xenos::MsaaSamples current_sample_positions_; - // Currently bound pipeline state, either a graphics pipeline state object - // from the pipeline state cache (with potentially deferred creation - - // current_external_pipeline_state_ is nullptr in this case) or a non-Xenos - // graphics or compute pipeline state object (current_cached_pipeline_state_ - // is nullptr in this case). - void* current_cached_pipeline_state_; - ID3D12PipelineState* current_external_pipeline_state_; + // Currently bound pipeline, either a graphics pipeline from the pipeline + // cache (with potentially deferred creation - current_external_pipeline_ is + // nullptr in this case) or a non-Xenos graphics or compute pipeline + // (current_cached_pipeline_ is nullptr in this case). + void* current_cached_pipeline_; + ID3D12PipelineState* current_external_pipeline_; // Currently bound graphics root signature. ID3D12RootSignature* current_graphics_root_signature_; diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index e50bbbaac..d32f223ce 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -157,7 +157,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor, stretch_pipeline_desc.SampleDesc.Count = 1; if (FAILED(device->CreateGraphicsPipelineState( &stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) { - XELOGE("Failed to create the front buffer stretch pipeline state"); + XELOGE("Failed to create the front buffer stretch pipeline"); stretch_gamma_root_signature_->Release(); stretch_gamma_root_signature_ = nullptr; stretch_root_signature_->Release(); @@ -170,8 +170,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor, if (FAILED(device->CreateGraphicsPipelineState( &stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) { XELOGE( - "Failed to create the gamma-correcting front buffer stretch " - "pipeline state"); + "Failed to create the gamma-correcting front buffer stretch pipeline"); stretch_pipeline_->Release(); stretch_pipeline_ = nullptr; stretch_gamma_root_signature_->Release(); diff --git a/src/xenia/gpu/d3d12/d3d12_shader.h b/src/xenia/gpu/d3d12/d3d12_shader.h index 7eb4ac6e0..c24d6a00a 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.h +++ b/src/xenia/gpu/d3d12/d3d12_shader.h @@ -85,7 +85,7 @@ class D3D12Shader : public Shader { return sampler_bindings_.data(); } - // For owning subsystems like the pipeline state cache, accessors for unique + // For owning subsystems like the pipeline cache, accessors for unique // identifiers (used instead of hashes to make sure collisions can't happen) // of binding layouts used by the shader, for invalidation if a shader with an // incompatible layout was bound. diff --git a/src/xenia/gpu/d3d12/deferred_command_list.cc b/src/xenia/gpu/d3d12/deferred_command_list.cc index 2b013e8ad..eb8d8922e 100644 --- a/src/xenia/gpu/d3d12/deferred_command_list.cc +++ b/src/xenia/gpu/d3d12/deferred_command_list.cc @@ -209,9 +209,8 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list, } } break; case Command::kSetPipelineStateHandle: { - current_pipeline_state = - command_processor_.GetD3D12PipelineStateByHandle( - *reinterpret_cast(stream)); + current_pipeline_state = command_processor_.GetD3D12PipelineByHandle( + *reinterpret_cast(stream)); if (current_pipeline_state) { command_list->SetPipelineState(current_pipeline_state); } diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 3a9f609d3..b2db2654e 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -43,10 +43,10 @@ DEFINE_bool( "D3D12"); DEFINE_int32( d3d12_pipeline_creation_threads, -1, - "Number of threads used for graphics pipeline state object creation. -1 to " - "calculate automatically (75% of logical CPU cores), a positive number to " - "specify the number of threads explicitly (up to the number of logical CPU " - "cores), 0 to disable multithreaded pipeline state object creation.", + "Number of threads used for graphics pipeline creation. -1 to calculate " + "automatically (75% of logical CPU cores), a positive number to specify " + "the number of threads explicitly (up to the number of logical CPU cores), " + "0 to disable multithreaded pipeline creation.", "D3D12"); DEFINE_bool(d3d12_tessellation_wireframe, false, "Display tessellated surfaces as wireframe for debugging.", @@ -125,8 +125,8 @@ bool PipelineCache::Initialize() { logical_processor_count = 6; } // Initialize creation thread synchronization data even if not using creation - // threads because they may be used anyway to create pipeline state objects - // from the storage. + // threads because they may be used anyway to create pipelines from the + // storage. creation_threads_busy_ = 0; creation_completion_event_ = xe::threading::Event::CreateManualResetEvent(true); @@ -145,7 +145,7 @@ bool PipelineCache::Initialize() { for (size_t i = 0; i < creation_thread_count; ++i) { std::unique_ptr creation_thread = xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); }); - creation_thread->set_name("D3D12 Pipeline States"); + creation_thread->set_name("D3D12 Pipelines"); creation_threads_.push_back(std::move(creation_thread)); } } @@ -184,13 +184,12 @@ void PipelineCache::ClearCache(bool shutting_down) { } ShutdownShaderStorage(); - // Remove references to the current pipeline state object. - current_pipeline_state_ = nullptr; + // Remove references to the current pipeline. + current_pipeline_ = nullptr; if (!creation_threads_.empty()) { - // Empty the pipeline state object creation queue and make sure there are no - // threads currently creating pipeline state objects because pipeline states - // are going to be deleted. + // Empty the pipeline creation queue and make sure there are no threads + // currently creating pipelines because pipelines are going to be deleted. bool await_creation_completion_event = false; { std::lock_guard lock(creation_request_lock_); @@ -207,13 +206,13 @@ void PipelineCache::ClearCache(bool shutting_down) { } } - // Destroy all pipeline state objects. - for (auto it : pipeline_states_) { + // Destroy all pipelines. + for (auto it : pipelines_) { it.second->state->Release(); delete it.second; } - pipeline_states_.clear(); - COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0); + pipelines_.clear(); + COUNT_profile_set("gpu/pipeline_cache/pipelines", 0); // Destroy all shaders. command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated(); @@ -223,10 +222,10 @@ void PipelineCache::ClearCache(bool shutting_down) { } texture_binding_layout_map_.clear(); texture_binding_layouts_.clear(); - for (auto it : shader_map_) { + for (auto it : shaders_) { delete it.second; } - shader_map_.clear(); + shaders_.clear(); if (reinitialize_shader_storage) { InitializeShaderStorage(shader_storage_root, shader_storage_title_id, @@ -374,8 +373,7 @@ void PipelineCache::InitializeShaderStorage( } size_t ucode_byte_count = shader_header.ucode_dword_count * sizeof(uint32_t); - if (shader_map_.find(shader_header.ucode_data_hash) != - shader_map_.end()) { + if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) { // Already added - usually shaders aren't added without the intention of // translating them imminently, so don't do additional checks to // actually ensure that translation happens right now (they would cause @@ -402,7 +400,7 @@ void PipelineCache::InitializeShaderStorage( D3D12Shader* shader = new D3D12Shader(shader_header.type, ucode_data_hash, ucode_dwords.data(), shader_header.ucode_dword_count); - shader_map_.insert({ucode_data_hash, shader}); + shaders_.emplace(ucode_data_hash, shader); // Create new threads if the currently existing threads can't keep up with // file reading, but not more than the number of logical processors minus // one. @@ -439,7 +437,7 @@ void PipelineCache::InitializeShaderStorage( } shader_translation_threads.clear(); for (D3D12Shader* shader : shaders_failed_to_translate) { - shader_map_.erase(shader->ucode_data_hash()); + shaders_.erase(shader->ucode_data_hash()); delete shader; } } @@ -460,72 +458,66 @@ void PipelineCache::InitializeShaderStorage( } // 'DXRO' or 'DXRT'. - const uint32_t pipeline_state_storage_magic_api = + const uint32_t pipeline_storage_magic_api = edram_rov_used_ ? 0x4F525844 : 0x54525844; - // Initialize the pipeline state storage stream. - uint64_t pipeline_state_storage_initialization_start_ = + // Initialize the pipeline storage stream. + uint64_t pipeline_storage_initialization_start_ = xe::Clock::QueryHostTickCount(); - auto pipeline_state_storage_file_path = + auto pipeline_storage_file_path = shader_storage_shareable_root / fmt::format("{:08X}.{}.d3d12.xpso", title_id, edram_rov_used_ ? "rov" : "rtv"); - pipeline_state_storage_file_ = - xe::filesystem::OpenFile(pipeline_state_storage_file_path, "a+b"); - if (!pipeline_state_storage_file_) { + pipeline_storage_file_ = + xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b"); + if (!pipeline_storage_file_) { XELOGE( - "Failed to open the Direct3D 12 pipeline state description storage " - "file for writing, persistent shader storage will be disabled: {}", - xe::path_to_utf8(pipeline_state_storage_file_path)); + "Failed to open the Direct3D 12 pipeline description storage file for " + "writing, persistent shader storage will be disabled: {}", + xe::path_to_utf8(pipeline_storage_file_path)); fclose(shader_storage_file_); shader_storage_file_ = nullptr; return; } - pipeline_state_storage_file_flush_needed_ = false; + pipeline_storage_file_flush_needed_ = false; // 'XEPS'. - const uint32_t pipeline_state_storage_magic = 0x53504558; + const uint32_t pipeline_storage_magic = 0x53504558; struct { uint32_t magic; uint32_t magic_api; uint32_t version_swapped; - } pipeline_state_storage_file_header; - if (fread(&pipeline_state_storage_file_header, - sizeof(pipeline_state_storage_file_header), 1, - pipeline_state_storage_file_) && - pipeline_state_storage_file_header.magic == - pipeline_state_storage_magic && - pipeline_state_storage_file_header.magic_api == - pipeline_state_storage_magic_api && - xe::byte_swap(pipeline_state_storage_file_header.version_swapped) == + } pipeline_storage_file_header; + if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header), + 1, pipeline_storage_file_) && + pipeline_storage_file_header.magic == pipeline_storage_magic && + pipeline_storage_file_header.magic_api == pipeline_storage_magic_api && + xe::byte_swap(pipeline_storage_file_header.version_swapped) == PipelineDescription::kVersion) { - uint64_t pipeline_state_storage_valid_bytes = - sizeof(pipeline_state_storage_file_header); - // Enqueue pipeline state descriptions written by previous Xenia executions - // until the end of the file or until a corrupted one is detected. - xe::filesystem::Seek(pipeline_state_storage_file_, 0, SEEK_END); - int64_t pipeline_state_storage_told_end = - xe::filesystem::Tell(pipeline_state_storage_file_); - size_t pipeline_state_storage_told_count = - size_t(pipeline_state_storage_told_end >= - int64_t(pipeline_state_storage_valid_bytes) - ? (uint64_t(pipeline_state_storage_told_end) - - pipeline_state_storage_valid_bytes) / - sizeof(PipelineStoredDescription) - : 0); - if (pipeline_state_storage_told_count && - xe::filesystem::Seek(pipeline_state_storage_file_, - int64_t(pipeline_state_storage_valid_bytes), - SEEK_SET)) { + uint64_t pipeline_storage_valid_bytes = + sizeof(pipeline_storage_file_header); + // Enqueue pipeline descriptions written by previous Xenia executions until + // the end of the file or until a corrupted one is detected. + xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END); + int64_t pipeline_storage_told_end = + xe::filesystem::Tell(pipeline_storage_file_); + size_t pipeline_storage_told_count = size_t( + pipeline_storage_told_end >= int64_t(pipeline_storage_valid_bytes) + ? (uint64_t(pipeline_storage_told_end) - + pipeline_storage_valid_bytes) / + sizeof(PipelineStoredDescription) + : 0); + if (pipeline_storage_told_count && + xe::filesystem::Seek(pipeline_storage_file_, + int64_t(pipeline_storage_valid_bytes), SEEK_SET)) { std::vector pipeline_stored_descriptions; - pipeline_stored_descriptions.resize(pipeline_state_storage_told_count); - pipeline_stored_descriptions.resize(fread( - pipeline_stored_descriptions.data(), - sizeof(PipelineStoredDescription), pipeline_state_storage_told_count, - pipeline_state_storage_file_)); + pipeline_stored_descriptions.resize(pipeline_storage_told_count); + pipeline_stored_descriptions.resize( + fread(pipeline_stored_descriptions.data(), + sizeof(PipelineStoredDescription), pipeline_storage_told_count, + pipeline_storage_file_)); if (!pipeline_stored_descriptions.empty()) { // Launch additional creation threads to use all cores to create - // pipeline state objects faster. Will also be using the main thread, so - // minus 1. + // pipelines faster. Will also be using the main thread, so minus 1. size_t creation_thread_original_count = creation_threads_.size(); size_t creation_thread_needed_count = std::max(std::min(pipeline_stored_descriptions.size(), @@ -539,10 +531,10 @@ void PipelineCache::InitializeShaderStorage( {}, [this, creation_thread_index]() { CreationThread(creation_thread_index); }); - creation_thread->set_name("D3D12 Pipeline States Additional"); + creation_thread->set_name("D3D12 Pipelines"); creation_threads_.push_back(std::move(creation_thread)); } - size_t pipeline_states_created = 0; + size_t pipelines_created = 0; for (const PipelineStoredDescription& pipeline_stored_description : pipeline_stored_descriptions) { const PipelineDescription& pipeline_description = @@ -554,30 +546,28 @@ void PipelineCache::InitializeShaderStorage( 0) != pipeline_stored_description.description_hash) { break; } - pipeline_state_storage_valid_bytes += - sizeof(PipelineStoredDescription); - // Skip already known pipeline states - those have already been - // enqueued. - auto found_range = pipeline_states_.equal_range( + pipeline_storage_valid_bytes += sizeof(PipelineStoredDescription); + // Skip already known pipelines - those have already been enqueued. + auto found_range = pipelines_.equal_range( pipeline_stored_description.description_hash); - bool pipeline_state_found = false; + bool pipeline_found = false; for (auto it = found_range.first; it != found_range.second; ++it) { - PipelineState* found_pipeline_state = it->second; - if (!std::memcmp(&found_pipeline_state->description.description, + Pipeline* found_pipeline = it->second; + if (!std::memcmp(&found_pipeline->description.description, &pipeline_description, sizeof(pipeline_description))) { - pipeline_state_found = true; + pipeline_found = true; break; } } - if (pipeline_state_found) { + if (pipeline_found) { continue; } PipelineRuntimeDescription pipeline_runtime_description; auto vertex_shader_it = - shader_map_.find(pipeline_description.vertex_shader_hash); - if (vertex_shader_it == shader_map_.end()) { + shaders_.find(pipeline_description.vertex_shader_hash); + if (vertex_shader_it == shaders_.end()) { continue; } pipeline_runtime_description.vertex_shader = vertex_shader_it->second; @@ -586,8 +576,8 @@ void PipelineCache::InitializeShaderStorage( } if (pipeline_description.pixel_shader_hash) { auto pixel_shader_it = - shader_map_.find(pipeline_description.pixel_shader_hash); - if (pixel_shader_it == shader_map_.end()) { + shaders_.find(pipeline_description.pixel_shader_hash); + if (pixel_shader_it == shaders_.end()) { continue; } pipeline_runtime_description.pixel_shader = pixel_shader_it->second; @@ -607,36 +597,33 @@ void PipelineCache::InitializeShaderStorage( std::memcpy(&pipeline_runtime_description.description, &pipeline_description, sizeof(pipeline_description)); - PipelineState* new_pipeline_state = new PipelineState; - new_pipeline_state->state = nullptr; - std::memcpy(&new_pipeline_state->description, - &pipeline_runtime_description, + Pipeline* new_pipeline = new Pipeline; + new_pipeline->state = nullptr; + std::memcpy(&new_pipeline->description, &pipeline_runtime_description, sizeof(pipeline_runtime_description)); - pipeline_states_.insert( - std::make_pair(pipeline_stored_description.description_hash, - new_pipeline_state)); - COUNT_profile_set("gpu/pipeline_cache/pipeline_states", - pipeline_states_.size()); + pipelines_.emplace(pipeline_stored_description.description_hash, + new_pipeline); + COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size()); if (!creation_threads_.empty()) { // Submit the pipeline for creation to any available thread. { std::lock_guard lock(creation_request_lock_); - creation_queue_.push_back(new_pipeline_state); + creation_queue_.push_back(new_pipeline); } creation_request_cond_.notify_one(); } else { - new_pipeline_state->state = - CreateD3D12PipelineState(pipeline_runtime_description); + new_pipeline->state = + CreateD3D12Pipeline(pipeline_runtime_description); } - ++pipeline_states_created; + ++pipelines_created; } - CreateQueuedPipelineStatesOnProcessorThread(); + CreateQueuedPipelinesOnProcessorThread(); if (creation_threads_.size() > creation_thread_original_count) { { std::lock_guard lock(creation_request_lock_); creation_threads_shutdown_from_ = creation_thread_original_count; // Assuming the queue is empty because of - // CreateQueuedPipelineStatesOnProcessorThread. + // CreateQueuedPipelinesOnProcessorThread. } creation_request_cond_.notify_all(); while (creation_threads_.size() > creation_thread_original_count) { @@ -664,26 +651,23 @@ void PipelineCache::InitializeShaderStorage( } } XELOGGPU( - "Created {} graphics pipeline state objects from the storage in {} " - "milliseconds", - pipeline_states_created, + "Created {} graphics pipelines from the storage in {} milliseconds", + pipelines_created, (xe::Clock::QueryHostTickCount() - - pipeline_state_storage_initialization_start_) * + pipeline_storage_initialization_start_) * 1000 / xe::Clock::QueryHostTickFrequency()); } } - xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, - pipeline_state_storage_valid_bytes); + xe::filesystem::TruncateStdioFile(pipeline_storage_file_, + pipeline_storage_valid_bytes); } else { - xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, 0); - pipeline_state_storage_file_header.magic = pipeline_state_storage_magic; - pipeline_state_storage_file_header.magic_api = - pipeline_state_storage_magic_api; - pipeline_state_storage_file_header.version_swapped = + xe::filesystem::TruncateStdioFile(pipeline_storage_file_, 0); + pipeline_storage_file_header.magic = pipeline_storage_magic; + pipeline_storage_file_header.magic_api = pipeline_storage_magic_api; + pipeline_storage_file_header.version_swapped = xe::byte_swap(PipelineDescription::kVersion); - fwrite(&pipeline_state_storage_file_header, - sizeof(pipeline_state_storage_file_header), 1, - pipeline_state_storage_file_); + fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header), + 1, pipeline_storage_file_); } shader_storage_root_ = storage_root; @@ -691,7 +675,7 @@ void PipelineCache::InitializeShaderStorage( // Start the storage writing thread. storage_write_flush_shaders_ = false; - storage_write_flush_pipeline_states_ = false; + storage_write_flush_pipelines_ = false; storage_write_thread_shutdown_ = false; storage_write_thread_ = xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); }); @@ -708,12 +692,12 @@ void PipelineCache::ShutdownShaderStorage() { storage_write_thread_.reset(); } storage_write_shader_queue_.clear(); - storage_write_pipeline_state_queue_.clear(); + storage_write_pipeline_queue_.clear(); - if (pipeline_state_storage_file_) { - fclose(pipeline_state_storage_file_); - pipeline_state_storage_file_ = nullptr; - pipeline_state_storage_file_flush_needed_ = false; + if (pipeline_storage_file_) { + fclose(pipeline_storage_file_); + pipeline_storage_file_ = nullptr; + pipeline_storage_file_flush_needed_ = false; } if (shader_storage_file_) { @@ -728,30 +712,29 @@ void PipelineCache::ShutdownShaderStorage() { void PipelineCache::EndSubmission() { if (shader_storage_file_flush_needed_ || - pipeline_state_storage_file_flush_needed_) { + pipeline_storage_file_flush_needed_) { { std::lock_guard lock(storage_write_request_lock_); if (shader_storage_file_flush_needed_) { storage_write_flush_shaders_ = true; } - if (pipeline_state_storage_file_flush_needed_) { - storage_write_flush_pipeline_states_ = true; + if (pipeline_storage_file_flush_needed_) { + storage_write_flush_pipelines_ = true; } } storage_write_request_cond_.notify_one(); shader_storage_file_flush_needed_ = false; - pipeline_state_storage_file_flush_needed_ = false; + pipeline_storage_file_flush_needed_ = false; } if (!creation_threads_.empty()) { - CreateQueuedPipelineStatesOnProcessorThread(); - // Await creation of all queued pipeline state objects. + CreateQueuedPipelinesOnProcessorThread(); + // Await creation of all queued pipelines. bool await_creation_completion_event; { std::lock_guard lock(creation_request_lock_); // Assuming the creation queue is already empty (because the processor - // thread also worked on creating the leftover pipeline state objects), so - // only check if there are threads with pipeline state objects currently - // being created. + // thread also worked on creating the leftover pipelines), so only check + // if there are threads with pipelines currently being created. await_creation_completion_event = creation_threads_busy_ != 0; if (await_creation_completion_event) { creation_completion_event_->Reset(); @@ -765,7 +748,7 @@ void PipelineCache::EndSubmission() { } } -bool PipelineCache::IsCreatingPipelineStates() { +bool PipelineCache::IsCreatingPipelines() { if (creation_threads_.empty()) { return false; } @@ -779,8 +762,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, uint32_t dword_count) { // Hash the input memory and lookup the shader. uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); - auto it = shader_map_.find(data_hash); - if (it != shader_map_.end()) { + auto it = shaders_.find(data_hash); + if (it != shaders_.end()) { // Shader has been previously loaded. return it->second; } @@ -790,7 +773,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, // again. D3D12Shader* shader = new D3D12Shader(shader_type, data_hash, host_address, dword_count); - shader_map_.insert({data_hash, shader}); + shaders_.emplace(data_hash, shader); return shader; } @@ -798,11 +781,11 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid() const { // If the values this functions returns are changed, INVALIDATE THE SHADER - // STORAGE (increase kVersion for BOTH shaders and pipeline states)! The - // exception is when the function originally returned "unsupported", but - // started to return a valid value (in this case the shader wouldn't be cached - // in the first place). Otherwise games will not be able to locate shaders for - // draws for which the host vertex shader type has changed! + // STORAGE (increase kVersion for BOTH shaders and pipelines)! The exception + // is when the function originally returned "unsupported", but started to + // return a valid value (in this case the shader wouldn't be cached in the + // first place). Otherwise games will not be able to locate shaders for draws + // for which the host vertex shader type has changed! const auto& regs = register_file_; auto vgt_draw_initiator = regs.Get(); if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode, @@ -929,13 +912,12 @@ bool PipelineCache::ConfigurePipeline( xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], - void** pipeline_state_handle_out, - ID3D12RootSignature** root_signature_out) { + void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES - assert_not_null(pipeline_state_handle_out); + assert_not_null(pipeline_handle_out); assert_not_null(root_signature_out); PipelineRuntimeDescription runtime_description; @@ -946,24 +928,24 @@ bool PipelineCache::ConfigurePipeline( } PipelineDescription& description = runtime_description.description; - if (current_pipeline_state_ != nullptr && - !std::memcmp(¤t_pipeline_state_->description.description, - &description, sizeof(description))) { - *pipeline_state_handle_out = current_pipeline_state_; + if (current_pipeline_ != nullptr && + !std::memcmp(¤t_pipeline_->description.description, &description, + sizeof(description))) { + *pipeline_handle_out = current_pipeline_; *root_signature_out = runtime_description.root_signature; return true; } - // Find an existing pipeline state object in the cache. + // Find an existing pipeline in the cache. uint64_t hash = XXH64(&description, sizeof(description), 0); - auto found_range = pipeline_states_.equal_range(hash); + auto found_range = pipelines_.equal_range(hash); for (auto it = found_range.first; it != found_range.second; ++it) { - PipelineState* found_pipeline_state = it->second; - if (!std::memcmp(&found_pipeline_state->description.description, - &description, sizeof(description))) { - current_pipeline_state_ = found_pipeline_state; - *pipeline_state_handle_out = found_pipeline_state; - *root_signature_out = found_pipeline_state->description.root_signature; + Pipeline* found_pipeline = it->second; + if (!std::memcmp(&found_pipeline->description.description, &description, + sizeof(description))) { + current_pipeline_ = found_pipeline; + *pipeline_handle_out = found_pipeline; + *root_signature_out = found_pipeline->description.root_signature; return true; } } @@ -974,33 +956,32 @@ bool PipelineCache::ConfigurePipeline( return false; } - PipelineState* new_pipeline_state = new PipelineState; - new_pipeline_state->state = nullptr; - std::memcpy(&new_pipeline_state->description, &runtime_description, + Pipeline* new_pipeline = new Pipeline; + new_pipeline->state = nullptr; + std::memcpy(&new_pipeline->description, &runtime_description, sizeof(runtime_description)); - pipeline_states_.insert(std::make_pair(hash, new_pipeline_state)); - COUNT_profile_set("gpu/pipeline_cache/pipeline_states", - pipeline_states_.size()); + pipelines_.emplace(hash, new_pipeline); + COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size()); if (!creation_threads_.empty()) { - // Submit the pipeline state object for creation to any available thread. + // Submit the pipeline for creation to any available thread. { std::lock_guard lock(creation_request_lock_); - creation_queue_.push_back(new_pipeline_state); + creation_queue_.push_back(new_pipeline); } creation_request_cond_.notify_one(); } else { - new_pipeline_state->state = CreateD3D12PipelineState(runtime_description); + new_pipeline->state = CreateD3D12Pipeline(runtime_description); } - if (pipeline_state_storage_file_) { + if (pipeline_storage_file_) { assert_not_null(storage_write_thread_); - pipeline_state_storage_file_flush_needed_ = true; + pipeline_storage_file_flush_needed_ = true; { std::lock_guard lock(storage_write_request_lock_); - storage_write_pipeline_state_queue_.emplace_back(); + storage_write_pipeline_queue_.emplace_back(); PipelineStoredDescription& stored_description = - storage_write_pipeline_state_queue_.back(); + storage_write_pipeline_queue_.back(); stored_description.description_hash = hash; std::memcpy(&stored_description.description, &description, sizeof(description)); @@ -1008,8 +989,8 @@ bool PipelineCache::ConfigurePipeline( storage_write_request_cond_.notify_all(); } - current_pipeline_state_ = new_pipeline_state; - *pipeline_state_handle_out = new_pipeline_state; + current_pipeline_ = new_pipeline; + *pipeline_handle_out = new_pipeline; *root_signature_out = runtime_description.root_signature; return true; } @@ -1136,8 +1117,8 @@ bool PipelineCache::TranslateShader( std::memcpy( texture_binding_layouts_.data() + new_uid.vector_span_offset, texture_bindings, texture_binding_layout_bytes); - texture_binding_layout_map_.insert( - {texture_binding_layout_hash, new_uid}); + texture_binding_layout_map_.emplace(texture_binding_layout_hash, + new_uid); } } if (bindless_sampler_count) { @@ -1179,8 +1160,8 @@ bool PipelineCache::TranslateShader( vector_bindless_sampler_layout[i] = sampler_bindings[i].bindless_descriptor_index; } - bindless_sampler_layout_map_.insert( - {bindless_sampler_layout_hash, new_uid}); + bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash, + new_uid); } } } @@ -1508,8 +1489,7 @@ bool PipelineCache::GetCurrentStateDescription( /* 16 */ PipelineBlendFactor::kSrcAlphaSat, }; // Like kBlendFactorMap, but with color modes changed to alpha. Some - // pipeline state objects aren't created in Prey because a color mode is - // used for alpha. + // pipelines aren't created in Prey because a color mode is used for alpha. static const PipelineBlendFactor kBlendFactorAlphaMap[32] = { /* 0 */ PipelineBlendFactor::kZero, /* 1 */ PipelineBlendFactor::kOne, @@ -1569,18 +1549,16 @@ bool PipelineCache::GetCurrentStateDescription( return true; } -ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState( +ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline( const PipelineRuntimeDescription& runtime_description) { const PipelineDescription& description = runtime_description.description; if (runtime_description.pixel_shader != nullptr) { - XELOGGPU( - "Creating graphics pipeline state with VS {:016X}" - ", PS {:016X}", - runtime_description.vertex_shader->ucode_data_hash(), - runtime_description.pixel_shader->ucode_data_hash()); + XELOGGPU("Creating graphics pipeline with VS {:016X}, PS {:016X}", + runtime_description.vertex_shader->ucode_data_hash(), + runtime_description.pixel_shader->ucode_data_hash()); } else { - XELOGGPU("Creating graphics pipeline state with VS {:016X}", + XELOGGPU("Creating graphics pipeline with VS {:016X}", runtime_description.vertex_shader->ucode_data_hash()); } @@ -1893,20 +1871,18 @@ ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState( } } - // Create the pipeline state object. + // Create the D3D12 pipeline state object. auto device = command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); ID3D12PipelineState* state; if (FAILED(device->CreateGraphicsPipelineState(&state_desc, IID_PPV_ARGS(&state)))) { if (runtime_description.pixel_shader != nullptr) { - XELOGE( - "Failed to create graphics pipeline state with VS {:016X}" - ", PS {:016X}", - runtime_description.vertex_shader->ucode_data_hash(), - runtime_description.pixel_shader->ucode_data_hash()); + XELOGE("Failed to create graphics pipeline with VS {:016X}, PS {:016X}", + runtime_description.vertex_shader->ucode_data_hash(), + runtime_description.pixel_shader->ucode_data_hash()); } else { - XELOGE("Failed to create graphics pipeline state with VS {:016X}", + XELOGE("Failed to create graphics pipeline with VS {:016X}", runtime_description.vertex_shader->ucode_data_hash()); } return nullptr; @@ -1933,7 +1909,7 @@ void PipelineCache::StorageWriteThread() { ucode_guest_endian.reserve(0xFFFF); bool flush_shaders = false; - bool flush_pipeline_states = false; + bool flush_pipelines = false; while (true) { if (flush_shaders) { @@ -1941,15 +1917,15 @@ void PipelineCache::StorageWriteThread() { assert_not_null(shader_storage_file_); fflush(shader_storage_file_); } - if (flush_pipeline_states) { - flush_pipeline_states = false; - assert_not_null(pipeline_state_storage_file_); - fflush(pipeline_state_storage_file_); + if (flush_pipelines) { + flush_pipelines = false; + assert_not_null(pipeline_storage_file_); + fflush(pipeline_storage_file_); } std::pair shader_pair = {}; PipelineStoredDescription pipeline_description; - bool write_pipeline_state = false; + bool write_pipeline = false; { std::unique_lock lock(storage_write_request_lock_); if (storage_write_thread_shutdown_) { @@ -1962,17 +1938,17 @@ void PipelineCache::StorageWriteThread() { storage_write_flush_shaders_ = false; flush_shaders = true; } - if (!storage_write_pipeline_state_queue_.empty()) { + if (!storage_write_pipeline_queue_.empty()) { std::memcpy(&pipeline_description, - &storage_write_pipeline_state_queue_.front(), + &storage_write_pipeline_queue_.front(), sizeof(pipeline_description)); - storage_write_pipeline_state_queue_.pop_front(); - write_pipeline_state = true; - } else if (storage_write_flush_pipeline_states_) { - storage_write_flush_pipeline_states_ = false; - flush_pipeline_states = true; + storage_write_pipeline_queue_.pop_front(); + write_pipeline = true; + } else if (storage_write_flush_pipelines_) { + storage_write_flush_pipelines_ = false; + flush_pipelines = true; } - if (!shader_pair.first && !write_pipeline_state) { + if (!shader_pair.first && !write_pipeline) { storage_write_request_cond_.wait(lock); continue; } @@ -1999,27 +1975,26 @@ void PipelineCache::StorageWriteThread() { } } - if (write_pipeline_state) { - assert_not_null(pipeline_state_storage_file_); + if (write_pipeline) { + assert_not_null(pipeline_storage_file_); fwrite(&pipeline_description, sizeof(pipeline_description), 1, - pipeline_state_storage_file_); + pipeline_storage_file_); } } } void PipelineCache::CreationThread(size_t thread_index) { while (true) { - PipelineState* pipeline_state_to_create = nullptr; + Pipeline* pipeline_to_create = nullptr; // Check if need to shut down or set the completion event and dequeue the - // pipeline state if there is any. + // pipeline if there is any. { std::unique_lock lock(creation_request_lock_); if (thread_index >= creation_threads_shutdown_from_ || creation_queue_.empty()) { if (creation_completion_set_event_ && creation_threads_busy_ == 0) { - // Last pipeline state object in the queue created - signal the event - // if requested. + // Last pipeline in the queue created - signal the event if requested. creation_completion_set_event_ = false; creation_completion_event_->Set(); } @@ -2029,23 +2004,22 @@ void PipelineCache::CreationThread(size_t thread_index) { creation_request_cond_.wait(lock); continue; } - // Take the pipeline state from the queue and increment the busy thread - // count until the pipeline state object is created - other threads must - // be able to dequeue requests, but can't set the completion event until - // the pipeline state objects are fully created (rather than just started - // creating). - pipeline_state_to_create = creation_queue_.front(); + // Take the pipeline from the queue and increment the busy thread count + // until the pipeline is created - other threads must be able to dequeue + // requests, but can't set the completion event until the pipelines are + // fully created (rather than just started creating). + pipeline_to_create = creation_queue_.front(); creation_queue_.pop_front(); ++creation_threads_busy_; } // Create the D3D12 pipeline state object. - pipeline_state_to_create->state = - CreateD3D12PipelineState(pipeline_state_to_create->description); + pipeline_to_create->state = + CreateD3D12Pipeline(pipeline_to_create->description); - // Pipeline state object created - the thread is not busy anymore, safe to - // set the completion event if needed (at the next iteration, or in some - // other thread). + // Pipeline created - the thread is not busy anymore, safe to set the + // completion event if needed (at the next iteration, or in some other + // thread). { std::lock_guard lock(creation_request_lock_); --creation_threads_busy_; @@ -2053,20 +2027,20 @@ void PipelineCache::CreationThread(size_t thread_index) { } } -void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() { +void PipelineCache::CreateQueuedPipelinesOnProcessorThread() { assert_false(creation_threads_.empty()); while (true) { - PipelineState* pipeline_state_to_create; + Pipeline* pipeline_to_create; { std::lock_guard lock(creation_request_lock_); if (creation_queue_.empty()) { break; } - pipeline_state_to_create = creation_queue_.front(); + pipeline_to_create = creation_queue_.front(); creation_queue_.pop_front(); } - pipeline_state_to_create->state = - CreateD3D12PipelineState(pipeline_state_to_create->description); + pipeline_to_create->state = + CreateD3D12Pipeline(pipeline_to_create->description); } } diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index cdc6ed5f3..ee7f0a7de 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -29,6 +29,7 @@ #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/xenos.h" +#include "xenia/ui/d3d12/d3d12_api.h" namespace xe { namespace gpu { @@ -54,7 +55,7 @@ class PipelineCache { void ShutdownShaderStorage(); void EndSubmission(); - bool IsCreatingPipelineStates(); + bool IsCreatingPipelines(); D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count); @@ -73,14 +74,12 @@ class PipelineCache { xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], - void** pipeline_state_handle_out, - ID3D12RootSignature** root_signature_out); + void** pipeline_handle_out, ID3D12RootSignature** root_signature_out); - // Returns a pipeline state object with deferred creation by its handle. May - // return nullptr if failed to create the pipeline state object. - inline ID3D12PipelineState* GetD3D12PipelineStateByHandle( - void* handle) const { - return reinterpret_cast(handle)->state; + // Returns a pipeline with deferred creation by its handle. May return nullptr + // if failed to create the pipeline. + inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const { + return reinterpret_cast(handle)->state; } private: @@ -237,7 +236,7 @@ class PipelineCache { const RenderTargetCache::PipelineRenderTarget render_targets[5], PipelineRuntimeDescription& runtime_description_out); - ID3D12PipelineState* CreateD3D12PipelineState( + ID3D12PipelineState* CreateD3D12Pipeline( const PipelineRuntimeDescription& runtime_description); D3D12CommandProcessor& command_processor_; @@ -255,9 +254,9 @@ class PipelineCache { IDxcUtils* dxc_utils_ = nullptr; IDxcCompiler* dxc_compiler_ = nullptr; - // All loaded shaders mapped by their guest hash key. + // Ucode hash -> shader. std::unordered_map> - shader_map_; + shaders_; struct LayoutUID { size_t uid; @@ -285,21 +284,20 @@ class PipelineCache { // Xenos pixel shader provided. std::vector depth_only_pixel_shader_; - struct PipelineState { + struct Pipeline { // nullptr if creation has failed. ID3D12PipelineState* state; PipelineRuntimeDescription description; }; - // All previously generated pipeline state objects identified by hash and the - // description. - std::unordered_multimap> - pipeline_states_; + pipelines_; - // Previously used pipeline state object. This matches our current state - // settings and allows us to quickly(ish) reuse the pipeline state if no - // registers have changed. - PipelineState* current_pipeline_state_ = nullptr; + // Previously used pipeline. This matches our current state settings and + // allows us to quickly(ish) reuse the pipeline if no registers have been + // changed. + Pipeline* current_pipeline_ = nullptr; // Currently open shader storage path. std::filesystem::path shader_storage_root_; @@ -309,10 +307,9 @@ class PipelineCache { FILE* shader_storage_file_ = nullptr; bool shader_storage_file_flush_needed_ = false; - // Pipeline state storage output stream, for preload in the next emulator - // runs. - FILE* pipeline_state_storage_file_ = nullptr; - bool pipeline_state_storage_file_flush_needed_ = false; + // Pipeline storage output stream, for preload in the next emulator runs. + FILE* pipeline_storage_file_ = nullptr; + bool pipeline_storage_file_flush_needed_ = false; // Thread for asynchronous writing to the storage streams. void StorageWriteThread(); @@ -322,28 +319,27 @@ class PipelineCache { // thread is notified about its change via storage_write_request_cond_. std::deque> storage_write_shader_queue_; - std::deque storage_write_pipeline_state_queue_; + std::deque storage_write_pipeline_queue_; bool storage_write_flush_shaders_ = false; - bool storage_write_flush_pipeline_states_ = false; + bool storage_write_flush_pipelines_ = false; bool storage_write_thread_shutdown_ = false; std::unique_ptr storage_write_thread_; - // Pipeline state object creation threads. + // Pipeline creation threads. void CreationThread(size_t thread_index); - void CreateQueuedPipelineStatesOnProcessorThread(); + void CreateQueuedPipelinesOnProcessorThread(); std::mutex creation_request_lock_; std::condition_variable creation_request_cond_; // Protected with creation_request_lock_, notify_one creation_request_cond_ // when set. - std::deque creation_queue_; - // Number of threads that are currently creating a pipeline state object - - // incremented when a pipeline state object is dequeued (the completion event - // can't be triggered before this is zero). Protected with - // creation_request_lock_. + std::deque creation_queue_; + // Number of threads that are currently creating a pipeline - incremented when + // a pipeline is dequeued (the completion event can't be triggered before this + // is zero). Protected with creation_request_lock_. size_t creation_threads_busy_ = 0; - // Manual-reset event set when the last queued pipeline state object is - // created and there are no more pipeline state objects to create. This is - // triggered by the thread creating the last pipeline state object. + // Manual-reset event set when the last queued pipeline is created and there + // are no more pipelines to create. This is triggered by the thread creating + // the last pipeline. std::unique_ptr creation_completion_event_; // Whether setting the event on completion is queued. Protected with // creation_request_lock_, notify_one creation_request_cond_ when set. diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index d4f989123..90ba11ac5 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -454,8 +454,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( // again and again and exit. if (!conversion_needed || converted_index_count == 0) { converted_indices.gpu_address = 0; - converted_indices_cache_.insert( - std::make_pair(converted_indices.key.value, converted_indices)); + converted_indices_cache_.emplace(converted_indices.key.value, + converted_indices); memory_regions_used_ |= memory_regions_used_bits; return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty : ConversionResult::kConversionNotNeeded; @@ -670,8 +670,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( // Cache and return the indices. converted_indices.gpu_address = gpu_address; - converted_indices_cache_.insert( - std::make_pair(converted_indices.key.value, converted_indices)); + converted_indices_cache_.emplace(converted_indices.key.value, + converted_indices); memory_regions_used_ |= memory_regions_used_bits; gpu_address_out = gpu_address; index_count_out = converted_index_count; diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index b2c964a55..66ef2ba9f 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -277,20 +277,19 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { return false; } - // Create the EDRAM load/store pipeline state objects. + // Create the EDRAM load/store pipelines. for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) { const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i]; - edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState( + edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline( device, mode_info.load_shader, mode_info.load_shader_size, edram_load_store_root_signature_); - edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState( + edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline( device, mode_info.store_shader, mode_info.store_shader_size, edram_load_store_root_signature_); if (edram_load_pipelines_[i] == nullptr || edram_store_pipelines_[i] == nullptr) { - XELOGE( - "Failed to create the EDRAM load/store pipeline states for mode {}", - i); + XELOGE("Failed to create the EDRAM load/store pipelines for mode {}", + i); Shutdown(); return false; } @@ -299,7 +298,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { } } - // Create the resolve root signatures and pipeline state objects. + // Create the resolve root signatures and pipelines. D3D12_ROOT_PARAMETER resolve_root_parameters[3]; // Copying root signature. @@ -369,7 +368,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { return false; } - // Copying pipeline state objects. + // Copying pipelines. uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1; for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount); ++i) { @@ -381,63 +380,61 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { continue; } const auto& resolve_copy_shader = resolve_copy_shaders_[i]; - ID3D12PipelineState* resolve_copy_pipeline_state = - ui::d3d12::util::CreateComputePipelineState( + ID3D12PipelineState* resolve_copy_pipeline = + ui::d3d12::util::CreateComputePipeline( device, resolve_copy_shader.first, resolve_copy_shader.second, resolve_copy_root_signature_); - if (resolve_copy_pipeline_state == nullptr) { - XELOGE("Failed to create {} resolve copy pipeline state", + if (resolve_copy_pipeline == nullptr) { + XELOGE("Failed to create {} resolve copy pipeline", resolve_copy_shader_info.debug_name); } - resolve_copy_pipeline_state->SetName(reinterpret_cast( + resolve_copy_pipeline->SetName(reinterpret_cast( xe::to_utf16(resolve_copy_shader_info.debug_name).c_str())); - resolve_copy_pipeline_states_[i] = resolve_copy_pipeline_state; + resolve_copy_pipelines_[i] = resolve_copy_pipeline; } - // Clearing pipeline state objects. - resolve_clear_32bpp_pipeline_state_ = - ui::d3d12::util::CreateComputePipelineState( - device, - resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs - : resolve_clear_32bpp_cs, - resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs) - : sizeof(resolve_clear_32bpp_cs), - resolve_clear_root_signature_); - if (resolve_clear_32bpp_pipeline_state_ == nullptr) { - XELOGE("Failed to create the 32bpp resolve clear pipeline state"); + // Clearing pipelines. + resolve_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline( + device, + resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs + : resolve_clear_32bpp_cs, + resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs) + : sizeof(resolve_clear_32bpp_cs), + resolve_clear_root_signature_); + if (resolve_clear_32bpp_pipeline_ == nullptr) { + XELOGE("Failed to create the 32bpp resolve clear pipeline"); Shutdown(); return false; } - resolve_clear_32bpp_pipeline_state_->SetName(L"Resolve Clear 32bpp"); - resolve_clear_64bpp_pipeline_state_ = - ui::d3d12::util::CreateComputePipelineState( - device, - resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs - : resolve_clear_64bpp_cs, - resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs) - : sizeof(resolve_clear_64bpp_cs), - resolve_clear_root_signature_); - if (resolve_clear_64bpp_pipeline_state_ == nullptr) { - XELOGE("Failed to create the 64bpp resolve clear pipeline state"); + resolve_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp"); + resolve_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline( + device, + resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs + : resolve_clear_64bpp_cs, + resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs) + : sizeof(resolve_clear_64bpp_cs), + resolve_clear_root_signature_); + if (resolve_clear_64bpp_pipeline_ == nullptr) { + XELOGE("Failed to create the 64bpp resolve clear pipeline"); Shutdown(); return false; } - resolve_clear_64bpp_pipeline_state_->SetName(L"Resolve Clear 64bpp"); + resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp"); if (!edram_rov_used_) { assert_false(resolution_scale_2x_); - resolve_clear_depth_24_32_pipeline_state_ = - ui::d3d12::util::CreateComputePipelineState( + resolve_clear_depth_24_32_pipeline_ = + ui::d3d12::util::CreateComputePipeline( device, resolve_clear_depth_24_32_cs, sizeof(resolve_clear_depth_24_32_cs), resolve_clear_root_signature_); - if (resolve_clear_depth_24_32_pipeline_state_ == nullptr) { + if (resolve_clear_depth_24_32_pipeline_ == nullptr) { XELOGE( "Failed to create the 24-bit and 32-bit depth resolve clear pipeline " "state"); Shutdown(); return false; } - resolve_clear_64bpp_pipeline_state_->SetName( + resolve_clear_64bpp_pipeline_->SetName( L"Resolve Clear 24-bit & 32-bit Depth"); } @@ -451,12 +448,12 @@ void RenderTargetCache::Shutdown() { edram_snapshot_restore_pool_.reset(); ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_); - ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_state_); - ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_state_); - ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_state_); + ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_); + ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_); + ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_); ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_); - for (size_t i = 0; i < xe::countof(resolve_copy_pipeline_states_); ++i) { - ui::d3d12::util::ReleaseAndNull(resolve_copy_pipeline_states_[i]); + for (size_t i = 0; i < xe::countof(resolve_copy_pipelines_); ++i) { + ui::d3d12::util::ReleaseAndNull(resolve_copy_pipelines_[i]); } ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_); for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) { @@ -1209,8 +1206,8 @@ bool RenderTargetCache::Resolve(const Memory& memory, 0, sizeof(copy_shader_constants) / sizeof(uint32_t), ©_shader_constants, 0); } - command_processor_.SetComputePipelineState( - resolve_copy_pipeline_states_[size_t(copy_shader)]); + command_processor_.SetComputePipeline( + resolve_copy_pipelines_[size_t(copy_shader)]); command_processor_.SubmitBarriers(); command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1); @@ -1279,9 +1276,9 @@ bool RenderTargetCache::Resolve(const Memory& memory, command_list.D3DSetComputeRoot32BitConstants( 0, sizeof(depth_clear_constants) / sizeof(uint32_t), &depth_clear_constants, 0); - command_processor_.SetComputePipelineState( - clear_float32_depth ? resolve_clear_depth_24_32_pipeline_state_ - : resolve_clear_32bpp_pipeline_state_); + command_processor_.SetComputePipeline( + clear_float32_depth ? resolve_clear_depth_24_32_pipeline_ + : resolve_clear_32bpp_pipeline_); command_processor_.SubmitBarriers(); command_list.D3DDispatch(clear_group_count.first, clear_group_count.second, 1); @@ -1301,10 +1298,10 @@ bool RenderTargetCache::Resolve(const Memory& memory, 0, sizeof(color_clear_constants) / sizeof(uint32_t), &color_clear_constants, 0); } - command_processor_.SetComputePipelineState( + command_processor_.SetComputePipeline( resolve_info.color_edram_info.format_is_64bpp - ? resolve_clear_64bpp_pipeline_state_ - : resolve_clear_32bpp_pipeline_state_); + ? resolve_clear_64bpp_pipeline_ + : resolve_clear_32bpp_pipeline_); command_processor_.SubmitBarriers(); command_list.D3DDispatch(clear_group_count.first, clear_group_count.second, 1); @@ -1816,7 +1813,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( render_target->footprints, nullptr, nullptr, ©_buffer_size); render_target->copy_buffer_size = uint32_t(copy_buffer_size); - render_targets_.insert(std::make_pair(key.value, render_target)); + render_targets_.emplace(key.value, render_target); COUNT_profile_set("gpu/render_target_cache/render_targets", render_targets_.size()); #if 0 @@ -2015,8 +2012,7 @@ void RenderTargetCache::StoreRenderTargetsToEdram() { 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth, render_target->key.format); - command_processor_.SetComputePipelineState( - edram_store_pipelines_[size_t(mode)]); + command_processor_.SetComputePipeline(edram_store_pipelines_[size_t(mode)]); // 1 group per 80x16 samples. command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1); @@ -2140,8 +2136,7 @@ void RenderTargetCache::LoadRenderTargetsFromEdram( 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth, render_target->key.format); - command_processor_.SetComputePipelineState( - edram_load_pipelines_[size_t(mode)]); + command_processor_.SetComputePipeline(edram_load_pipelines_[size_t(mode)]); // 1 group per 80x16 samples. command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1); diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index 0def0d25c..bc68c68a9 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -237,14 +237,13 @@ class D3D12CommandProcessor; // get each of the 4 host pixels for each sample. class RenderTargetCache { public: - // Direct3D 12 debug layer does some kaschenit-style trolling by giving errors - // that contradict each other when you use null RTV descriptors - if you set - // a valid format in RTVFormats in the pipeline state, it says that null - // descriptors can only be used if the format in the pipeline state is - // DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains - // that the format in the pipeline doesn't match the RTV format. So we have to - // make render target bindings consecutive and remap the output indices in - // pixel shaders. + // Direct3D 12 debug layer is giving errors that contradict each other when + // you use null RTV descriptors - if you set a valid format in RTVFormats in + // the pipeline state, it says that null descriptors can only be used if the + // format in the pipeline state is DXGI_FORMAT_UNKNOWN, however, if + // DXGI_FORMAT_UNKNOWN is set, it complains that the format in the pipeline + // state doesn't match the RTV format. So we have to make render target + // bindings consecutive and remap the output indices in pixel shaders. struct PipelineRenderTarget { uint32_t guest_render_target; DXGI_FORMAT format; @@ -537,7 +536,7 @@ class RenderTargetCache { // 16: - EDRAM pitch in tiles. uint32_t base_samples_2x_depth_pitch; }; - // EDRAM pipeline states for the RTV/DSV path. + // EDRAM pipelines for the RTV/DSV path. static const EdramLoadStoreModeInfo edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)]; ID3D12PipelineState* @@ -546,20 +545,20 @@ class RenderTargetCache { ID3D12PipelineState* edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {}; - // Resolve root signatures and pipeline state objects. + // Resolve root signatures and pipelines. ID3D12RootSignature* resolve_copy_root_signature_ = nullptr; static const std::pair resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)]; - ID3D12PipelineState* resolve_copy_pipeline_states_[size_t( + ID3D12PipelineState* resolve_copy_pipelines_[size_t( draw_util::ResolveCopyShaderIndex::kCount)] = {}; ID3D12RootSignature* resolve_clear_root_signature_ = nullptr; // Clearing 32bpp color, depth with ROV, or unorm depth without ROV. - ID3D12PipelineState* resolve_clear_32bpp_pipeline_state_ = nullptr; + ID3D12PipelineState* resolve_clear_32bpp_pipeline_ = nullptr; // Clearing 64bpp color. - ID3D12PipelineState* resolve_clear_64bpp_pipeline_state_ = nullptr; + ID3D12PipelineState* resolve_clear_64bpp_pipeline_ = nullptr; // Clearing float depth without ROV, both the float24 and the host float32 // versions. - ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_state_ = nullptr; + ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_ = nullptr; // FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on // Nvidia Maxwell 1st generation and older. diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index c8b1e6297..44d76c9ed 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -918,27 +918,24 @@ bool TextureCache::Initialize(bool edram_rov_used) { return false; } - // Create the loading pipeline state objects. + // Create the loading pipelines. for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { const LoadModeInfo& mode_info = load_mode_info_[i]; - load_pipeline_states_[i] = ui::d3d12::util::CreateComputePipelineState( + load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline( device, mode_info.shader, mode_info.shader_size, load_root_signature_); - if (load_pipeline_states_[i] == nullptr) { - XELOGE( - "Failed to create the texture loading pipeline state object for mode " - "{}", - i); + if (load_pipelines_[i] == nullptr) { + XELOGE("Failed to create the texture loading pipeline for mode {}", i); Shutdown(); return false; } if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) { - load_pipeline_states_2x_[i] = ui::d3d12::util::CreateComputePipelineState( + load_pipelines_2x_[i] = ui::d3d12::util::CreateComputePipeline( device, mode_info.shader_2x, mode_info.shader_2x_size, load_root_signature_); - if (load_pipeline_states_2x_[i] == nullptr) { + if (load_pipelines_2x_[i] == nullptr) { XELOGE( - "Failed to create the 2x-scaled texture loading pipeline state " - "for mode {}", + "Failed to create the 2x-scaled texture loading pipeline for mode " + "{}", i); Shutdown(); return false; @@ -1024,8 +1021,8 @@ void TextureCache::Shutdown() { ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_); for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { - ui::d3d12::util::ReleaseAndNull(load_pipeline_states_2x_[i]); - ui::d3d12::util::ReleaseAndNull(load_pipeline_states_[i]); + ui::d3d12::util::ReleaseAndNull(load_pipelines_2x_[i]); + ui::d3d12::util::ReleaseAndNull(load_pipelines_[i]); } ui::d3d12::util::ReleaseAndNull(load_root_signature_); @@ -1892,7 +1889,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { if (IsResolutionScale2X() && key.tiled) { LoadMode load_mode = GetLoadMode(key); if (load_mode != LoadMode::kUnknown && - load_pipeline_states_2x_[uint32_t(load_mode)] != nullptr) { + load_pipelines_2x_[uint32_t(load_mode)] != nullptr) { uint32_t base_size = 0, mip_size = 0; texture_util::GetTextureTotalSize( key.dimension, key.width, key.height, key.depth, key.format, @@ -2047,7 +2044,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { } texture->base_watch_handle = nullptr; texture->mip_watch_handle = nullptr; - textures_.insert(std::make_pair(map_key, texture)); + textures_.emplace(map_key, texture); COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); textures_total_size_ += texture->resource_size; COUNT_profile_set("gpu/texture_cache/total_size_mb", @@ -2079,10 +2076,10 @@ bool TextureCache::LoadTextureData(Texture* texture) { return false; } bool scaled_resolve = texture->key.scaled_resolve ? true : false; - ID3D12PipelineState* pipeline_state = - scaled_resolve ? load_pipeline_states_2x_[uint32_t(load_mode)] - : load_pipeline_states_[uint32_t(load_mode)]; - if (pipeline_state == nullptr) { + ID3D12PipelineState* pipeline = scaled_resolve + ? load_pipelines_2x_[uint32_t(load_mode)] + : load_pipelines_[uint32_t(load_mode)]; + if (pipeline == nullptr) { return false; } const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)]; @@ -2296,7 +2293,7 @@ bool TextureCache::LoadTextureData(Texture* texture) { load_mode_info.srv_bpe_log2); } } - command_processor_.SetComputePipelineState(pipeline_state); + command_processor_.SetComputePipeline(pipeline); command_list.D3DSetComputeRootSignature(load_root_signature_); command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second); @@ -2597,7 +2594,7 @@ uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture, } device->CreateShaderResourceView( texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index)); - texture.srv_descriptors.insert({descriptor_key, descriptor_index}); + texture.srv_descriptors.emplace(descriptor_key, descriptor_index); return descriptor_index; } diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index 1345d8faf..0e66328f0 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -550,9 +550,9 @@ class TextureCache { static const LoadModeInfo load_mode_info_[]; ID3D12RootSignature* load_root_signature_ = nullptr; - ID3D12PipelineState* load_pipeline_states_[size_t(LoadMode::kCount)] = {}; - // Load pipeline state objects for 2x-scaled resolved targets. - ID3D12PipelineState* load_pipeline_states_2x_[size_t(LoadMode::kCount)] = {}; + ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {}; + // Load pipelines for 2x-scaled resolved targets. + ID3D12PipelineState* load_pipelines_2x_[size_t(LoadMode::kCount)] = {}; std::unordered_multimap textures_; uint64_t textures_total_size_ = 0; diff --git a/src/xenia/gpu/dxbc_shader_translator_fetch.cc b/src/xenia/gpu/dxbc_shader_translator_fetch.cc index 92be28630..76eed4d10 100644 --- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc +++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc @@ -99,8 +99,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0), DxbcSrc::LU(~uint32_t(3))); } - // Add the word offset from the instruction, plus the offset of the first - // needed word within the element. + // Add the word offset from the instruction (signed), plus the offset of the + // first needed word within the element. uint32_t first_word_index; xe::bit_scan_forward(needed_words, &first_word_index); int32_t first_word_buffer_offset = diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index d253bdad0..23998c307 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -65,17 +65,17 @@ enum class InstructionStorageTarget { // disassembly (because oPts.x000 will be assembled, but oPts.x00_ has both // skipped components and zeros, which cannot be encoded, and therefore it will // not). -constexpr uint32_t GetInstructionStorageTargetUsedComponents( +constexpr uint32_t GetInstructionStorageTargetUsedComponentCount( InstructionStorageTarget target) { switch (target) { case InstructionStorageTarget::kNone: - return 0b0000; + return 0; case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: - return 0b0111; + return 3; case InstructionStorageTarget::kDepth: - return 0b0001; + return 1; default: - return 0b1111; + return 4; } } @@ -136,8 +136,9 @@ struct InstructionResult { // Returns the write mask containing only components actually present in the // target. uint32_t GetUsedWriteMask() const { - return original_write_mask & - GetInstructionStorageTargetUsedComponents(storage_target); + uint32_t target_component_count = + GetInstructionStorageTargetUsedComponentCount(storage_target); + return original_write_mask & ((1 << target_component_count) - 1); } // True if the components are in their 'standard' swizzle arrangement (xyzw). bool IsStandardSwizzle() const { @@ -161,6 +162,28 @@ struct InstructionResult { } return used_components; } + // Returns which components of the used write mask are constant, and what + // values they have. + uint32_t GetUsedConstantComponents(uint32_t& constant_values_out) const { + uint32_t constant_components = 0; + uint32_t constant_values = 0; + uint32_t used_write_mask = GetUsedWriteMask(); + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_write_mask & (1 << i))) { + continue; + } + SwizzleSource component = components[i]; + if (component >= SwizzleSource::kX && component <= SwizzleSource::kW) { + continue; + } + constant_components |= 1 << i; + if (component == SwizzleSource::k1) { + constant_values |= 1 << i; + } + } + constant_values_out = constant_values; + return constant_components; + } }; enum class InstructionStorageSource { diff --git a/src/xenia/gpu/shared_memory.h b/src/xenia/gpu/shared_memory.h index 496836a38..98719b670 100644 --- a/src/xenia/gpu/shared_memory.h +++ b/src/xenia/gpu/shared_memory.h @@ -25,6 +25,9 @@ namespace gpu { // system page size granularity. class SharedMemory { public: + static constexpr uint32_t kBufferSizeLog2 = 29; + static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2; + virtual ~SharedMemory(); // Call in the implementation-specific ClearCache. virtual void ClearCache(); @@ -98,9 +101,6 @@ class SharedMemory { // destructor. void ShutdownCommon(); - static constexpr uint32_t kBufferSizeLog2 = 29; - static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2; - // Sparse allocations are 4 MB, so not too many of them are allocated, but // also not to waste too much memory for padding (with 16 MB there's too // much). diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc index b9e23dc93..5c0a104e5 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc @@ -118,15 +118,15 @@ bool D3D12ImmediateDrawer::Initialize() { return false; } - // Create the pipeline states. - D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_desc = {}; - pipeline_state_desc.pRootSignature = root_signature_; - pipeline_state_desc.VS.pShaderBytecode = immediate_vs; - pipeline_state_desc.VS.BytecodeLength = sizeof(immediate_vs); - pipeline_state_desc.PS.pShaderBytecode = immediate_ps; - pipeline_state_desc.PS.BytecodeLength = sizeof(immediate_ps); + // Create the pipelines. + D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {}; + pipeline_desc.pRootSignature = root_signature_; + pipeline_desc.VS.pShaderBytecode = immediate_vs; + pipeline_desc.VS.BytecodeLength = sizeof(immediate_vs); + pipeline_desc.PS.pShaderBytecode = immediate_ps; + pipeline_desc.PS.BytecodeLength = sizeof(immediate_ps); D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc = - pipeline_state_desc.BlendState.RenderTarget[0]; + pipeline_desc.BlendState.RenderTarget[0]; pipeline_blend_desc.BlendEnable = TRUE; pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA; pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA; @@ -138,11 +138,11 @@ bool D3D12ImmediateDrawer::Initialize() { pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED | D3D12_COLOR_WRITE_ENABLE_GREEN | D3D12_COLOR_WRITE_ENABLE_BLUE; - pipeline_state_desc.SampleMask = UINT_MAX; - pipeline_state_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; - pipeline_state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; - pipeline_state_desc.RasterizerState.FrontCounterClockwise = FALSE; - pipeline_state_desc.RasterizerState.DepthClipEnable = TRUE; + pipeline_desc.SampleMask = UINT_MAX; + pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + pipeline_desc.RasterizerState.FrontCounterClockwise = FALSE; + pipeline_desc.RasterizerState.DepthClipEnable = TRUE; D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {}; pipeline_input_elements[0].SemanticName = "POSITION"; pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT; @@ -154,26 +154,24 @@ bool D3D12ImmediateDrawer::Initialize() { pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM; pipeline_input_elements[2].AlignedByteOffset = offsetof(ImmediateVertex, color); - pipeline_state_desc.InputLayout.pInputElementDescs = pipeline_input_elements; - pipeline_state_desc.InputLayout.NumElements = + pipeline_desc.InputLayout.pInputElementDescs = pipeline_input_elements; + pipeline_desc.InputLayout.NumElements = UINT(xe::countof(pipeline_input_elements)); - pipeline_state_desc.PrimitiveTopologyType = - D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - pipeline_state_desc.NumRenderTargets = 1; - pipeline_state_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat; - pipeline_state_desc.SampleDesc.Count = 1; + pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + pipeline_desc.NumRenderTargets = 1; + pipeline_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat; + pipeline_desc.SampleDesc.Count = 1; if (FAILED(device->CreateGraphicsPipelineState( - &pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_triangle_)))) { + &pipeline_desc, IID_PPV_ARGS(&pipeline_triangle_)))) { XELOGE( "Failed to create the Direct3D 12 immediate drawer triangle pipeline " "state"); Shutdown(); return false; } - pipeline_state_desc.PrimitiveTopologyType = - D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; if (FAILED(device->CreateGraphicsPipelineState( - &pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_line_)))) { + &pipeline_desc, IID_PPV_ARGS(&pipeline_line_)))) { XELOGE( "Failed to create the Direct3D 12 immediate drawer line pipeline " "state"); @@ -267,8 +265,8 @@ void D3D12ImmediateDrawer::Shutdown() { util::ReleaseAndNull(sampler_heap_); - util::ReleaseAndNull(pipeline_state_line_); - util::ReleaseAndNull(pipeline_state_triangle_); + util::ReleaseAndNull(pipeline_line_); + util::ReleaseAndNull(pipeline_triangle_); util::ReleaseAndNull(root_signature_); } @@ -611,17 +609,17 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { uint32_t(sampler_index))); } - // Set the primitive type and the pipeline state for it. + // Set the primitive type and the pipeline for it. D3D_PRIMITIVE_TOPOLOGY primitive_topology; - ID3D12PipelineState* pipeline_state; + ID3D12PipelineState* pipeline; switch (draw.primitive_type) { case ImmediatePrimitiveType::kLines: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; - pipeline_state = pipeline_state_line_; + pipeline = pipeline_line_; break; case ImmediatePrimitiveType::kTriangles: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - pipeline_state = pipeline_state_triangle_; + pipeline = pipeline_triangle_; break; default: assert_unhandled_case(draw.primitive_type); @@ -630,7 +628,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { if (current_primitive_topology_ != primitive_topology) { current_primitive_topology_ = primitive_topology; current_command_list_->IASetPrimitiveTopology(primitive_topology); - current_command_list_->SetPipelineState(pipeline_state); + current_command_list_->SetPipelineState(pipeline); } // Draw. diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.h b/src/xenia/ui/d3d12/d3d12_immediate_drawer.h index 4300af76e..fbc362f59 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.h +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.h @@ -105,8 +105,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer { kCount }; - ID3D12PipelineState* pipeline_state_triangle_ = nullptr; - ID3D12PipelineState* pipeline_state_line_ = nullptr; + ID3D12PipelineState* pipeline_triangle_ = nullptr; + ID3D12PipelineState* pipeline_line_ = nullptr; ID3D12DescriptorHeap* sampler_heap_ = nullptr; D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_; diff --git a/src/xenia/ui/d3d12/d3d12_util.cc b/src/xenia/ui/d3d12/d3d12_util.cc index 710d3b6db..caea2b296 100644 --- a/src/xenia/ui/d3d12/d3d12_util.cc +++ b/src/xenia/ui/d3d12/d3d12_util.cc @@ -47,7 +47,7 @@ ID3D12RootSignature* CreateRootSignature( return root_signature; } -ID3D12PipelineState* CreateComputePipelineState( +ID3D12PipelineState* CreateComputePipeline( ID3D12Device* device, const void* shader, size_t shader_size, ID3D12RootSignature* root_signature) { D3D12_COMPUTE_PIPELINE_STATE_DESC desc; diff --git a/src/xenia/ui/d3d12/d3d12_util.h b/src/xenia/ui/d3d12/d3d12_util.h index 5bce23568..062177218 100644 --- a/src/xenia/ui/d3d12/d3d12_util.h +++ b/src/xenia/ui/d3d12/d3d12_util.h @@ -39,9 +39,10 @@ inline bool ReleaseAndNull(T& object) { ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider, const D3D12_ROOT_SIGNATURE_DESC& desc); -ID3D12PipelineState* CreateComputePipelineState( - ID3D12Device* device, const void* shader, size_t shader_size, - ID3D12RootSignature* root_signature); +ID3D12PipelineState* CreateComputePipeline(ID3D12Device* device, + const void* shader, + size_t shader_size, + ID3D12RootSignature* root_signature); constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) { switch (element_size_bytes_log2) { diff --git a/src/xenia/ui/graphics_upload_buffer_pool.cc b/src/xenia/ui/graphics_upload_buffer_pool.cc index 2a780b0c9..5eb04fba3 100644 --- a/src/xenia/ui/graphics_upload_buffer_pool.cc +++ b/src/xenia/ui/graphics_upload_buffer_pool.cc @@ -71,7 +71,7 @@ void GraphicsUploadBufferPool::FlushWrites() { GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request( uint64_t submission_index, size_t size, size_t alignment, size_t& offset_out) { - assert_not_zero(alignment); + alignment = std::max(alignment, size_t(1)); assert_true(xe::is_pow2(alignment)); size = xe::align(size, alignment); assert_true(size <= page_size_); @@ -126,7 +126,7 @@ GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request( GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::RequestPartial( uint64_t submission_index, size_t size, size_t alignment, size_t& offset_out, size_t& size_out) { - assert_not_zero(alignment); + alignment = std::max(alignment, size_t(1)); assert_true(xe::is_pow2(alignment)); size = xe::align(size, alignment); size = std::min(size, page_size_); From fe9b5b4a8f22fb83dccadaf536ca2647ac3a9a9e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 14 Nov 2020 17:02:09 +0300 Subject: [PATCH 02/12] [D3D12] Cleanup: remove inline --- src/xenia/gpu/d3d12/d3d12_command_processor.h | 4 +- src/xenia/gpu/d3d12/d3d12_shared_memory.h | 8 +- src/xenia/gpu/d3d12/deferred_command_list.h | 102 +++++++++--------- src/xenia/gpu/d3d12/pipeline_cache.h | 2 +- src/xenia/gpu/d3d12/render_target_cache.h | 3 +- src/xenia/gpu/d3d12/texture_cache.h | 30 +++--- src/xenia/ui/d3d12/d3d12_provider.h | 30 +++--- src/xenia/ui/d3d12/d3d12_util.h | 2 +- 8 files changed, 86 insertions(+), 95 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 42c1e0092..ceffe5fd0 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -188,7 +188,7 @@ class D3D12CommandProcessor : public CommandProcessor { // Returns a pipeline with deferred creation by its handle. May return nullptr // if failed to create the pipeline. - inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const { + ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const { return pipeline_cache_->GetD3D12PipelineByHandle(handle); } @@ -501,7 +501,7 @@ class D3D12CommandProcessor : public CommandProcessor { static constexpr uint32_t kSwapTextureWidth = 1280; static constexpr uint32_t kSwapTextureHeight = 720; - inline std::pair GetSwapTextureSize() const { + std::pair GetSwapTextureSize() const { if (texture_cache_->IsResolutionScale2X()) { return std::make_pair(kSwapTextureWidth * 2, kSwapTextureHeight * 2); } diff --git a/src/xenia/gpu/d3d12/d3d12_shared_memory.h b/src/xenia/gpu/d3d12/d3d12_shared_memory.h index 6620cecaa..dc918bb11 100644 --- a/src/xenia/gpu/d3d12/d3d12_shared_memory.h +++ b/src/xenia/gpu/d3d12/d3d12_shared_memory.h @@ -48,7 +48,7 @@ class D3D12SharedMemory : public SharedMemory { // UseForReading or UseForWriting. // Makes the buffer usable for vertices, indices and texture untiling. - inline void UseForReading() { + void UseForReading() { // Vertex fetch is also allowed in pixel shaders. CommitUAVWritesAndTransitionBuffer( D3D12_RESOURCE_STATE_INDEX_BUFFER | @@ -56,18 +56,18 @@ class D3D12SharedMemory : public SharedMemory { D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } // Makes the buffer usable for texture tiling after a resolve. - inline void UseForWriting() { + void UseForWriting() { CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); } // Makes the buffer usable as a source for copy commands. - inline void UseAsCopySource() { + void UseAsCopySource() { CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE); } // Must be called when doing draws/dispatches modifying data within the shared // memory buffer as a UAV, to make sure that when UseForWriting is called the // next time, a UAV barrier will be done, and subsequent overlapping UAV // writes and reads are ordered. - inline void MarkUAVWritesCommitNeeded() { + void MarkUAVWritesCommitNeeded() { if (buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { buffer_uav_writes_commit_needed_ = true; } diff --git a/src/xenia/gpu/d3d12/deferred_command_list.h b/src/xenia/gpu/d3d12/deferred_command_list.h index 9393798c3..e8060371c 100644 --- a/src/xenia/gpu/d3d12/deferred_command_list.h +++ b/src/xenia/gpu/d3d12/deferred_command_list.h @@ -33,7 +33,7 @@ class DeferredCommandList { void Execute(ID3D12GraphicsCommandList* command_list, ID3D12GraphicsCommandList1* command_list_1); - inline void D3DClearUnorderedAccessViewUint( + void D3DClearUnorderedAccessViewUint( D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap, D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle, ID3D12Resource* resource, const UINT values[4], UINT num_rects, const D3D12_RECT* rects) { @@ -51,9 +51,9 @@ class DeferredCommandList { } } - inline void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset, - ID3D12Resource* src_buffer, UINT64 src_offset, - UINT64 num_bytes) { + void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset, + ID3D12Resource* src_buffer, UINT64 src_offset, + UINT64 num_bytes) { auto& args = *reinterpret_cast(WriteCommand( Command::kD3DCopyBufferRegion, sizeof(D3DCopyBufferRegionArguments))); args.dst_buffer = dst_buffer; @@ -63,26 +63,26 @@ class DeferredCommandList { args.num_bytes = num_bytes; } - inline void D3DCopyResource(ID3D12Resource* dst_resource, - ID3D12Resource* src_resource) { + void D3DCopyResource(ID3D12Resource* dst_resource, + ID3D12Resource* src_resource) { auto& args = *reinterpret_cast(WriteCommand( Command::kD3DCopyResource, sizeof(D3DCopyResourceArguments))); args.dst_resource = dst_resource; args.src_resource = src_resource; } - inline void CopyTexture(const D3D12_TEXTURE_COPY_LOCATION& dst, - const D3D12_TEXTURE_COPY_LOCATION& src) { + void CopyTexture(const D3D12_TEXTURE_COPY_LOCATION& dst, + const D3D12_TEXTURE_COPY_LOCATION& src) { auto& args = *reinterpret_cast( WriteCommand(Command::kCopyTexture, sizeof(CopyTextureArguments))); std::memcpy(&args.dst, &dst, sizeof(D3D12_TEXTURE_COPY_LOCATION)); std::memcpy(&args.src, &src, sizeof(D3D12_TEXTURE_COPY_LOCATION)); } - inline void CopyTextureRegion(const D3D12_TEXTURE_COPY_LOCATION& dst, - UINT dst_x, UINT dst_y, UINT dst_z, - const D3D12_TEXTURE_COPY_LOCATION& src, - const D3D12_BOX& src_box) { + void CopyTextureRegion(const D3D12_TEXTURE_COPY_LOCATION& dst, UINT dst_x, + UINT dst_y, UINT dst_z, + const D3D12_TEXTURE_COPY_LOCATION& src, + const D3D12_BOX& src_box) { auto& args = *reinterpret_cast(WriteCommand( Command::kCopyTextureRegion, sizeof(CopyTextureRegionArguments))); std::memcpy(&args.dst, &dst, sizeof(D3D12_TEXTURE_COPY_LOCATION)); @@ -93,8 +93,8 @@ class DeferredCommandList { args.src_box = src_box; } - inline void D3DDispatch(UINT thread_group_count_x, UINT thread_group_count_y, - UINT thread_group_count_z) { + void D3DDispatch(UINT thread_group_count_x, UINT thread_group_count_y, + UINT thread_group_count_z) { auto& args = *reinterpret_cast( WriteCommand(Command::kD3DDispatch, sizeof(D3DDispatchArguments))); args.thread_group_count_x = thread_group_count_x; @@ -102,11 +102,10 @@ class DeferredCommandList { args.thread_group_count_z = thread_group_count_z; } - inline void D3DDrawIndexedInstanced(UINT index_count_per_instance, - UINT instance_count, - UINT start_index_location, - INT base_vertex_location, - UINT start_instance_location) { + void D3DDrawIndexedInstanced(UINT index_count_per_instance, + UINT instance_count, UINT start_index_location, + INT base_vertex_location, + UINT start_instance_location) { auto& args = *reinterpret_cast( WriteCommand(Command::kD3DDrawIndexedInstanced, sizeof(D3DDrawIndexedInstancedArguments))); @@ -117,9 +116,9 @@ class DeferredCommandList { args.start_instance_location = start_instance_location; } - inline void D3DDrawInstanced(UINT vertex_count_per_instance, - UINT instance_count, UINT start_vertex_location, - UINT start_instance_location) { + void D3DDrawInstanced(UINT vertex_count_per_instance, UINT instance_count, + UINT start_vertex_location, + UINT start_instance_location) { auto& args = *reinterpret_cast(WriteCommand( Command::kD3DDrawInstanced, sizeof(D3DDrawInstancedArguments))); args.vertex_count_per_instance = vertex_count_per_instance; @@ -128,7 +127,7 @@ class DeferredCommandList { args.start_instance_location = start_instance_location; } - inline void D3DIASetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW* view) { + void D3DIASetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW* view) { auto& args = *reinterpret_cast(WriteCommand( Command::kD3DIASetIndexBuffer, sizeof(D3D12_INDEX_BUFFER_VIEW))); if (view != nullptr) { @@ -142,14 +141,13 @@ class DeferredCommandList { } } - inline void D3DIASetPrimitiveTopology( - D3D12_PRIMITIVE_TOPOLOGY primitive_topology) { + void D3DIASetPrimitiveTopology(D3D12_PRIMITIVE_TOPOLOGY primitive_topology) { auto& arg = *reinterpret_cast(WriteCommand( Command::kD3DIASetPrimitiveTopology, sizeof(D3D12_PRIMITIVE_TOPOLOGY))); arg = primitive_topology; } - inline void D3DOMSetBlendFactor(const FLOAT blend_factor[4]) { + void D3DOMSetBlendFactor(const FLOAT blend_factor[4]) { auto args = reinterpret_cast( WriteCommand(Command::kD3DOMSetBlendFactor, 4 * sizeof(FLOAT))); args[0] = blend_factor[0]; @@ -158,7 +156,7 @@ class DeferredCommandList { args[3] = blend_factor[3]; } - inline void D3DOMSetRenderTargets( + void D3DOMSetRenderTargets( UINT num_render_target_descriptors, const D3D12_CPU_DESCRIPTOR_HANDLE* render_target_descriptors, BOOL rts_single_handle_to_descriptor_range, @@ -185,14 +183,14 @@ class DeferredCommandList { } } - inline void D3DOMSetStencilRef(UINT stencil_ref) { + void D3DOMSetStencilRef(UINT stencil_ref) { auto& arg = *reinterpret_cast( WriteCommand(Command::kD3DOMSetStencilRef, sizeof(UINT))); arg = stencil_ref; } - inline void D3DResourceBarrier(UINT num_barriers, - const D3D12_RESOURCE_BARRIER* barriers) { + void D3DResourceBarrier(UINT num_barriers, + const D3D12_RESOURCE_BARRIER* barriers) { if (num_barriers == 0) { return; } @@ -207,21 +205,22 @@ class DeferredCommandList { num_barriers * sizeof(D3D12_RESOURCE_BARRIER)); } - inline void RSSetScissorRect(const D3D12_RECT& rect) { + void RSSetScissorRect(const D3D12_RECT& rect) { auto& arg = *reinterpret_cast( WriteCommand(Command::kRSSetScissorRect, sizeof(D3D12_RECT))); arg = rect; } - inline void RSSetViewport(const D3D12_VIEWPORT& viewport) { + void RSSetViewport(const D3D12_VIEWPORT& viewport) { auto& arg = *reinterpret_cast( WriteCommand(Command::kRSSetViewport, sizeof(D3D12_VIEWPORT))); arg = viewport; } - inline void D3DSetComputeRoot32BitConstants( - UINT root_parameter_index, UINT num_32bit_values_to_set, - const void* src_data, UINT dest_offset_in_32bit_values) { + void D3DSetComputeRoot32BitConstants(UINT root_parameter_index, + UINT num_32bit_values_to_set, + const void* src_data, + UINT dest_offset_in_32bit_values) { if (num_32bit_values_to_set == 0) { return; } @@ -235,9 +234,10 @@ class DeferredCommandList { std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t)); } - inline void D3DSetGraphicsRoot32BitConstants( - UINT root_parameter_index, UINT num_32bit_values_to_set, - const void* src_data, UINT dest_offset_in_32bit_values) { + void D3DSetGraphicsRoot32BitConstants(UINT root_parameter_index, + UINT num_32bit_values_to_set, + const void* src_data, + UINT dest_offset_in_32bit_values) { if (num_32bit_values_to_set == 0) { return; } @@ -251,7 +251,7 @@ class DeferredCommandList { std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t)); } - inline void D3DSetComputeRootConstantBufferView( + void D3DSetComputeRootConstantBufferView( UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) { auto& args = *reinterpret_cast( WriteCommand(Command::kD3DSetComputeRootConstantBufferView, @@ -260,7 +260,7 @@ class DeferredCommandList { args.buffer_location = buffer_location; } - inline void D3DSetGraphicsRootConstantBufferView( + void D3DSetGraphicsRootConstantBufferView( UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) { auto& args = *reinterpret_cast( WriteCommand(Command::kD3DSetGraphicsRootConstantBufferView, @@ -269,7 +269,7 @@ class DeferredCommandList { args.buffer_location = buffer_location; } - inline void D3DSetComputeRootDescriptorTable( + void D3DSetComputeRootDescriptorTable( UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) { auto& args = *reinterpret_cast( WriteCommand(Command::kD3DSetComputeRootDescriptorTable, @@ -278,7 +278,7 @@ class DeferredCommandList { args.base_descriptor.ptr = base_descriptor.ptr; } - inline void D3DSetGraphicsRootDescriptorTable( + void D3DSetGraphicsRootDescriptorTable( UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) { auto& args = *reinterpret_cast( WriteCommand(Command::kD3DSetGraphicsRootDescriptorTable, @@ -287,42 +287,40 @@ class DeferredCommandList { args.base_descriptor.ptr = base_descriptor.ptr; } - inline void D3DSetComputeRootSignature(ID3D12RootSignature* root_signature) { + void D3DSetComputeRootSignature(ID3D12RootSignature* root_signature) { auto& arg = *reinterpret_cast(WriteCommand( Command::kD3DSetComputeRootSignature, sizeof(ID3D12RootSignature*))); arg = root_signature; } - inline void D3DSetGraphicsRootSignature(ID3D12RootSignature* root_signature) { + void D3DSetGraphicsRootSignature(ID3D12RootSignature* root_signature) { auto& arg = *reinterpret_cast(WriteCommand( Command::kD3DSetGraphicsRootSignature, sizeof(ID3D12RootSignature*))); arg = root_signature; } - inline void SetDescriptorHeaps( - ID3D12DescriptorHeap* cbv_srv_uav_descriptor_heap, - ID3D12DescriptorHeap* sampler_descriptor_heap) { + void SetDescriptorHeaps(ID3D12DescriptorHeap* cbv_srv_uav_descriptor_heap, + ID3D12DescriptorHeap* sampler_descriptor_heap) { auto& args = *reinterpret_cast(WriteCommand( Command::kSetDescriptorHeaps, sizeof(SetDescriptorHeapsArguments))); args.cbv_srv_uav_descriptor_heap = cbv_srv_uav_descriptor_heap; args.sampler_descriptor_heap = sampler_descriptor_heap; } - inline void D3DSetPipelineState(ID3D12PipelineState* pipeline_state) { + void D3DSetPipelineState(ID3D12PipelineState* pipeline_state) { auto& arg = *reinterpret_cast(WriteCommand( Command::kD3DSetPipelineState, sizeof(ID3D12PipelineState*))); arg = pipeline_state; } - inline void SetPipelineStateHandle(void* pipeline_state_handle) { + void SetPipelineStateHandle(void* pipeline_state_handle) { auto& arg = *reinterpret_cast( WriteCommand(Command::kSetPipelineStateHandle, sizeof(void*))); arg = pipeline_state_handle; } - inline void D3DSetSamplePositions( - UINT num_samples_per_pixel, UINT num_pixels, - const D3D12_SAMPLE_POSITION* sample_positions) { + void D3DSetSamplePositions(UINT num_samples_per_pixel, UINT num_pixels, + const D3D12_SAMPLE_POSITION* sample_positions) { auto& args = *reinterpret_cast( WriteCommand(Command::kD3DSetSamplePositions, sizeof(D3DSetSamplePositionsArguments))); diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index ee7f0a7de..8159416d0 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -78,7 +78,7 @@ class PipelineCache { // Returns a pipeline with deferred creation by its handle. May return nullptr // if failed to create the pipeline. - inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const { + ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const { return reinterpret_cast(handle)->state; } diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index bc68c68a9..6d20e8d52 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -303,8 +303,7 @@ class RenderTargetCache { // performance difference, but with EDRAM loads/stores less conversion should // be performed by the shaders if D24S8 is emulated as D24_UNORM_S8_UINT, and // it's probably more accurate. - static inline DXGI_FORMAT GetDepthDXGIFormat( - xenos::DepthRenderTargetFormat format) { + static DXGI_FORMAT GetDepthDXGIFormat(xenos::DepthRenderTargetFormat format) { return format == xenos::DepthRenderTargetFormat::kD24FS8 ? DXGI_FORMAT_D32_FLOAT_S8X24_UINT : DXGI_FORMAT_D24_UNORM_S8_UINT; diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index 0e66328f0..85131f25d 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -106,18 +106,18 @@ class TextureCache { bool operator!=(const TextureKey& key) const { return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key; } - inline uint64_t GetMapKey() const { + uint64_t GetMapKey() const { return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32); } - inline void SetMapKey(uint64_t key) { + void SetMapKey(uint64_t key) { map_key[0] = uint32_t(key); map_key[1] = uint32_t(key >> 32); } - inline bool IsInvalid() const { + bool IsInvalid() const { // Zero base and zero width is enough for a binding to be invalid. return map_key[0] == 0; } - inline void MakeInvalid() { + void MakeInvalid() { // Reset all for a stable hash. SetMapKey(0); bucket_key = 0; @@ -222,9 +222,7 @@ class TextureCache { void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled); - inline bool IsResolutionScale2X() const { - return scaled_resolve_buffer_ != nullptr; - } + bool IsResolutionScale2X() const { return scaled_resolve_buffer_ != nullptr; } ID3D12Resource* GetScaledResolveBuffer() const { return scaled_resolve_buffer_; } @@ -233,7 +231,7 @@ class TextureCache { uint32_t length_unscaled); void UseScaledResolveBufferForReading(); void UseScaledResolveBufferForWriting(); - inline void MarkScaledResolveBufferUAVWritesCommitNeeded() { + void MarkScaledResolveBufferUAVWritesCommitNeeded() { if (scaled_resolve_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { scaled_resolve_buffer_uav_writes_commit_needed_ = true; } @@ -432,7 +430,7 @@ class TextureCache { // Whether the signed version of the texture has a different representation on // the host than its unsigned version (for example, if it's a fixed-point // texture emulated with a larger host pixel format). - static inline bool IsSignedVersionSeparate(xenos::TextureFormat format) { + static bool IsSignedVersionSeparate(xenos::TextureFormat format) { const HostFormat& host_format = host_formats_[uint32_t(format)]; return host_format.load_mode_snorm != LoadMode::kUnknown && host_format.load_mode_snorm != host_format.load_mode; @@ -441,26 +439,24 @@ class TextureCache { // of block-compressed textures with 4x4-aligned dimensions on PC). static bool IsDecompressionNeeded(xenos::TextureFormat format, uint32_t width, uint32_t height); - static inline DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format, - uint32_t width, - uint32_t height) { + static DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format, + uint32_t width, uint32_t height) { const HostFormat& host_format = host_formats_[uint32_t(format)]; return IsDecompressionNeeded(format, width, height) ? host_format.dxgi_format_uncompressed : host_format.dxgi_format_resource; } - static inline DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) { + static DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) { return GetDXGIResourceFormat(key.format, key.width, key.height); } - static inline DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format, - uint32_t width, - uint32_t height) { + static DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format, + uint32_t width, uint32_t height) { const HostFormat& host_format = host_formats_[uint32_t(format)]; return IsDecompressionNeeded(format, width, height) ? host_format.dxgi_format_uncompressed : host_format.dxgi_format_unorm; } - static inline DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) { + static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) { return GetDXGIUnormFormat(key.format, key.width, key.height); } diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index 0e70def17..255d42a3d 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -46,22 +46,22 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetRTVDescriptorSize() const { return descriptor_size_rtv_; } uint32_t GetDSVDescriptorSize() const { return descriptor_size_dsv_; } template - inline T OffsetViewDescriptor(T start, uint32_t index) const { + T OffsetViewDescriptor(T start, uint32_t index) const { start.ptr += index * descriptor_size_view_; return start; } template - inline T OffsetSamplerDescriptor(T start, uint32_t index) const { + T OffsetSamplerDescriptor(T start, uint32_t index) const { start.ptr += index * descriptor_size_sampler_; return start; } template - inline T OffsetRTVDescriptor(T start, uint32_t index) const { + T OffsetRTVDescriptor(T start, uint32_t index) const { start.ptr += index * descriptor_size_rtv_; return start; } template - inline T OffsetDSVDescriptor(T start, uint32_t index) const { + T OffsetDSVDescriptor(T start, uint32_t index) const { start.ptr += index * descriptor_size_dsv_; return start; } @@ -91,32 +91,30 @@ class D3D12Provider : public GraphicsProvider { } // Proxies for Direct3D 12 functions since they are loaded dynamically. - inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, - D3D_ROOT_SIGNATURE_VERSION version, - ID3DBlob** blob_out, - ID3DBlob** error_blob_out) const { + HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, + D3D_ROOT_SIGNATURE_VERSION version, + ID3DBlob** blob_out, + ID3DBlob** error_blob_out) const { return pfn_d3d12_serialize_root_signature_(desc, version, blob_out, error_blob_out); } - inline HRESULT Disassemble(const void* src_data, size_t src_data_size, - UINT flags, const char* comments, - ID3DBlob** disassembly_out) const { + HRESULT Disassemble(const void* src_data, size_t src_data_size, UINT flags, + const char* comments, ID3DBlob** disassembly_out) const { if (!pfn_d3d_disassemble_) { return E_NOINTERFACE; } return pfn_d3d_disassemble_(src_data, src_data_size, flags, comments, disassembly_out); } - inline HRESULT DxbcConverterCreateInstance(const CLSID& rclsid, - const IID& riid, - void** ppv) const { + HRESULT DxbcConverterCreateInstance(const CLSID& rclsid, const IID& riid, + void** ppv) const { if (!pfn_dxilconv_dxc_create_instance_) { return E_NOINTERFACE; } return pfn_dxilconv_dxc_create_instance_(rclsid, riid, ppv); } - inline HRESULT DxcCreateInstance(const CLSID& rclsid, const IID& riid, - void** ppv) const { + HRESULT DxcCreateInstance(const CLSID& rclsid, const IID& riid, + void** ppv) const { if (!pfn_dxcompiler_dxc_create_instance_) { return E_NOINTERFACE; } diff --git a/src/xenia/ui/d3d12/d3d12_util.h b/src/xenia/ui/d3d12/d3d12_util.h index 062177218..6798f4f1c 100644 --- a/src/xenia/ui/d3d12/d3d12_util.h +++ b/src/xenia/ui/d3d12/d3d12_util.h @@ -27,7 +27,7 @@ extern const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload; extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback; template -inline bool ReleaseAndNull(T& object) { +bool ReleaseAndNull(T& object) { if (object != nullptr) { object->Release(); object = nullptr; From 2dc6b0b2adacdab88a4755f8baff62176b8ba3a7 Mon Sep 17 00:00:00 2001 From: Gliniak Date: Sun, 27 Sep 2020 18:05:28 +0200 Subject: [PATCH 03/12] [Kernel/Thread] Added missing paramteter to KeSetAffinityThread --- .../kernel/xboxkrnl/xboxkrnl_threading.cc | 20 ++++++++++++----- src/xenia/kernel/xthread.cc | 22 +++++++++++++------ src/xenia/kernel/xthread.h | 5 ++--- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index 42292895b..29b064841 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -222,13 +222,23 @@ void KeSetCurrentStackPointers(lpvoid_t stack_ptr, } DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentStackPointers, kThreading, kImplemented); -dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity) { - auto thread = XObject::GetNativeObject(kernel_state(), thread_ptr); - if (thread) { - thread->SetAffinity(affinity); +dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity, + lpdword_t previous_affinity_ptr) { + // Xbox 360 uses additional parameter (in comparation to NT equivalent) + // which is used only for returning previous thread affinity. (Based on code + // dissasembly) + if (!affinity) { + return X_STATUS_INVALID_PARAMETER; } - return (uint32_t)affinity; + auto thread = XObject::GetNativeObject(kernel_state(), thread_ptr); + if (thread) { + if (previous_affinity_ptr) { + *previous_affinity_ptr = 1 << thread->active_cpu(); + } + thread->SetAffinity(affinity); + } + return X_STATUS_SUCCESS; } DECLARE_XBOXKRNL_EXPORT1(KeSetAffinityThread, kThreading, kImplemented); diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index 458d7a592..1e4753053 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -205,6 +205,7 @@ void XThread::InitializeGuestObject() { // 0xA88 = APC // 0x18 = timer xe::store_and_swap(p + 0x09C, 0xFDFFD7FF); + xe::store_and_swap(p + 0xBF, 0); xe::store_and_swap(p + 0x0D0, stack_base_); xe::store_and_swap(p + 0x130, Clock::QueryGuestSystemTime()); xe::store_and_swap(p + 0x144, guest_object() + 0x144); @@ -346,6 +347,9 @@ X_STATUS XThread::Create() { // Exports use this to get the kernel. thread_state_->context()->kernel_state = kernel_state_; + // Initialize the KTHREAD object. + InitializeGuestObject(); + X_KPCR* pcr = memory()->TranslateVirtual(pcr_address_); pcr->tls_ptr = tls_static_address_; @@ -355,14 +359,12 @@ X_STATUS XThread::Create() { pcr->stack_base_ptr = stack_base_; pcr->stack_end_ptr = stack_limit_; + pcr->dpc_active = 0; // DPC active bool? + uint8_t proc_mask = static_cast(creation_params_.creation_flags >> 24); - - pcr->current_cpu = GetFakeCpuNumber(proc_mask); // Current CPU(?) - pcr->dpc_active = 0; // DPC active bool? - - // Initialize the KTHREAD object. - InitializeGuestObject(); + // Assign cpu core used by thread on guest side + SetAffinity(1 << GetFakeCpuNumber(proc_mask)); // Always retain when starting - the thread owns itself until exited. RetainHandle(); @@ -714,7 +716,7 @@ void XThread::SetAffinity(uint32_t affinity) { XELOGW("Too few processors - scheduling will be wonky"); } SetActiveCpu(GetFakeCpuNumber(affinity)); - affinity_ = affinity; + if (!cvars::ignore_thread_affinities) { thread_->set_affinity_mask(affinity); } @@ -729,6 +731,12 @@ void XThread::SetActiveCpu(uint32_t cpu_index) { assert_true(cpu_index < 6); uint8_t* pcr = memory()->TranslateVirtual(pcr_address_); xe::store_and_swap(pcr + 0x10C, cpu_index); + + if (is_guest_thread()) { + X_KTHREAD* thread_object = + memory()->TranslateVirtual(guest_object()); + thread_object->current_cpu = cpu_index; + } } bool XThread::GetTLSValue(uint32_t slot, uint32_t* value_out) { diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index 2b6518703..de813bb49 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -88,7 +88,8 @@ struct X_KTHREAD { char unk_10[0xAC]; // 0x10 uint8_t suspend_count; // 0xBC uint8_t unk_BD; // 0xBD - uint16_t unk_BE; // 0xBE + uint8_t unk_BE; // 0xBE + uint8_t current_cpu; // 0xBF char unk_C0[0x70]; // 0xC0 xe::be create_time; // 0x130 xe::be exit_time; // 0x138 @@ -165,7 +166,6 @@ class XThread : public XObject, public cpu::Thread { int32_t priority() const { return priority_; } int32_t QueryPriority(); void SetPriority(int32_t increment); - uint32_t affinity() const { return affinity_; } void SetAffinity(uint32_t affinity); uint32_t active_cpu() const; void SetActiveCpu(uint32_t cpu_index); @@ -220,7 +220,6 @@ class XThread : public XObject, public cpu::Thread { bool running_ = false; int32_t priority_ = 0; - uint32_t affinity_ = 0; xe::global_critical_region global_critical_region_; std::atomic irql_ = {0}; From a3196171853c7f3d61011da0f42309bef32fb836 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 14 Nov 2020 18:09:47 +0300 Subject: [PATCH 04/12] [Kernel] Thread affinity cleanup --- .../kernel/xboxkrnl/xboxkrnl_threading.cc | 10 +-- src/xenia/kernel/xthread.cc | 79 +++++++++---------- src/xenia/kernel/xthread.h | 12 ++- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index 29b064841..1f0cd2cc2 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -224,17 +224,17 @@ DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentStackPointers, kThreading, kImplemented); dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity, lpdword_t previous_affinity_ptr) { - // Xbox 360 uses additional parameter (in comparation to NT equivalent) - // which is used only for returning previous thread affinity. (Based on code - // dissasembly) + // The Xbox 360, according to disassembly of KeSetAffinityThread, unlike + // Windows NT, stores the previous affinity via the pointer provided as an + // argument, not in the return value - the return value is used for the + // result. if (!affinity) { return X_STATUS_INVALID_PARAMETER; } - auto thread = XObject::GetNativeObject(kernel_state(), thread_ptr); if (thread) { if (previous_affinity_ptr) { - *previous_affinity_ptr = 1 << thread->active_cpu(); + *previous_affinity_ptr = uint32_t(1) << thread->active_cpu(); } thread->SetAffinity(affinity); } diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index 1e4753053..1e723ff65 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -156,11 +156,17 @@ void XThread::set_name(const std::string_view name) { } } -uint8_t next_cpu = 0; -uint8_t GetFakeCpuNumber(uint8_t proc_mask) { +static uint8_t next_cpu = 0; +static uint8_t GetFakeCpuNumber(uint8_t proc_mask) { + // NOTE: proc_mask is logical processors, not physical processors or cores. if (!proc_mask) { next_cpu = (next_cpu + 1) % 6; return next_cpu; // is this reasonable? + // TODO(Triang3l): Does the following apply here? + // https://docs.microsoft.com/en-us/windows/win32/dxtecharts/coding-for-multiple-cores + // "On Xbox 360, you must explicitly assign software threads to a particular + // hardware thread by using XSetThreadProcessor. Otherwise, all child + // threads will stay on the same hardware thread as the parent." } assert_false(proc_mask & 0xC0); @@ -205,7 +211,7 @@ void XThread::InitializeGuestObject() { // 0xA88 = APC // 0x18 = timer xe::store_and_swap(p + 0x09C, 0xFDFFD7FF); - xe::store_and_swap(p + 0xBF, 0); + // current_cpu is expected to be initialized externally via SetActiveCpu. xe::store_and_swap(p + 0x0D0, stack_base_); xe::store_and_swap(p + 0x130, Clock::QueryGuestSystemTime()); xe::store_and_swap(p + 0x144, guest_object() + 0x144); @@ -347,6 +353,9 @@ X_STATUS XThread::Create() { // Exports use this to get the kernel. thread_state_->context()->kernel_state = kernel_state_; + uint8_t cpu_index = GetFakeCpuNumber( + static_cast(creation_params_.creation_flags >> 24)); + // Initialize the KTHREAD object. InitializeGuestObject(); @@ -361,10 +370,9 @@ X_STATUS XThread::Create() { pcr->dpc_active = 0; // DPC active bool? - uint8_t proc_mask = - static_cast(creation_params_.creation_flags >> 24); - // Assign cpu core used by thread on guest side - SetAffinity(1 << GetFakeCpuNumber(proc_mask)); + // Assign the thread to the logical processor, and also set up the current CPU + // in KPCR and KTHREAD. + SetActiveCpu(cpu_index); // Always retain when starting - the thread owns itself until exited. RetainHandle(); @@ -417,10 +425,6 @@ X_STATUS XThread::Create() { return X_STATUS_NO_MEMORY; } - if (!cvars::ignore_thread_affinities) { - thread_->set_affinity_mask(proc_mask); - } - // Set the thread name based on host ID (for easier debugging). if (thread_name_.empty()) { set_name(fmt::format("XThread{:04X}", thread_->system_id())); @@ -702,40 +706,33 @@ void XThread::SetPriority(int32_t increment) { } void XThread::SetAffinity(uint32_t affinity) { - // Affinity mask, as in SetThreadAffinityMask. - // Xbox thread IDs: - // 0 - core 0, thread 0 - user - // 1 - core 0, thread 1 - user - // 2 - core 1, thread 0 - sometimes xcontent - // 3 - core 1, thread 1 - user - // 4 - core 2, thread 0 - xaudio - // 5 - core 2, thread 1 - user - // TODO(benvanik): implement better thread distribution. - // NOTE: these are logical processors, not physical processors or cores. + SetActiveCpu(GetFakeCpuNumber(affinity)); +} + +uint8_t XThread::active_cpu() const { + const X_KPCR& pcr = *memory()->TranslateVirtual(pcr_address_); + return pcr.current_cpu; +} + +void XThread::SetActiveCpu(uint8_t cpu_index) { + // May be called during thread creation - don't skip if current == new. + + assert_true(cpu_index < 6); + + X_KPCR& pcr = *memory()->TranslateVirtual(pcr_address_); + pcr.current_cpu = cpu_index; + + if (is_guest_thread()) { + X_KTHREAD& thread_object = + *memory()->TranslateVirtual(guest_object()); + thread_object.current_cpu = cpu_index; + } + if (xe::threading::logical_processor_count() < 6) { XELOGW("Too few processors - scheduling will be wonky"); } - SetActiveCpu(GetFakeCpuNumber(affinity)); - if (!cvars::ignore_thread_affinities) { - thread_->set_affinity_mask(affinity); - } -} - -uint32_t XThread::active_cpu() const { - uint8_t* pcr = memory()->TranslateVirtual(pcr_address_); - return xe::load_and_swap(pcr + 0x10C); -} - -void XThread::SetActiveCpu(uint32_t cpu_index) { - assert_true(cpu_index < 6); - uint8_t* pcr = memory()->TranslateVirtual(pcr_address_); - xe::store_and_swap(pcr + 0x10C, cpu_index); - - if (is_guest_thread()) { - X_KTHREAD* thread_object = - memory()->TranslateVirtual(guest_object()); - thread_object->current_cpu = cpu_index; + thread_->set_affinity_mask(uint64_t(1) << cpu_index); } } diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index de813bb49..7ab55c686 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -166,9 +166,17 @@ class XThread : public XObject, public cpu::Thread { int32_t priority() const { return priority_; } int32_t QueryPriority(); void SetPriority(int32_t increment); + + // Xbox thread IDs: + // 0 - core 0, thread 0 - user + // 1 - core 0, thread 1 - user + // 2 - core 1, thread 0 - sometimes xcontent + // 3 - core 1, thread 1 - user + // 4 - core 2, thread 0 - xaudio + // 5 - core 2, thread 1 - user void SetAffinity(uint32_t affinity); - uint32_t active_cpu() const; - void SetActiveCpu(uint32_t cpu_index); + uint8_t active_cpu() const; + void SetActiveCpu(uint8_t cpu_index); bool GetTLSValue(uint32_t slot, uint32_t* value_out); bool SetTLSValue(uint32_t slot, uint32_t value); From f8d7652dc427dd1f819084fa1188c6d3d739c2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Gli=C5=84ski?= Date: Sat, 14 Nov 2020 17:30:56 +0100 Subject: [PATCH 05/12] [Kernel] Remove remaining SHIM usage from xam_net (#1671) --- src/xenia/kernel/xam/xam_net.cc | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/xenia/kernel/xam/xam_net.cc b/src/xenia/kernel/xam/xam_net.cc index 41ac0e3eb..a28b788e3 100644 --- a/src/xenia/kernel/xam/xam_net.cc +++ b/src/xenia/kernel/xam/xam_net.cc @@ -542,19 +542,13 @@ dword_result_t NetDll_XNetDnsRelease(dword_t caller, pointer_t dns) { } DECLARE_XAM_EXPORT1(NetDll_XNetDnsRelease, kNetworking, kStub); -SHIM_CALL NetDll_XNetQosServiceLookup_shim(PPCContext* ppc_context, - KernelState* kernel_state) { - uint32_t caller = SHIM_GET_ARG_32(0); - uint32_t zero = SHIM_GET_ARG_32(1); - uint32_t event_handle = SHIM_GET_ARG_32(2); - uint32_t out_ptr = SHIM_GET_ARG_32(3); - - XELOGD("NetDll_XNetQosServiceLookup({}, {}, {:08X}, {:08X})", caller, zero, - event_handle, out_ptr); - +dword_result_t NetDll_XNetQosServiceLookup(dword_t caller, dword_t zero, + dword_t event_handle, + lpdword_t out_ptr) { // Non-zero is error. - SHIM_SET_RETURN_32(1); + return 1; } +DECLARE_XAM_EXPORT1(NetDll_XNetQosServiceLookup, kNetworking, kStub); dword_result_t NetDll_XNetQosListen(dword_t caller, lpvoid_t id, lpvoid_t data, dword_t data_size, dword_t r7, @@ -965,9 +959,7 @@ dword_result_t NetDll___WSAFDIsSet(dword_t socket_handle, DECLARE_XAM_EXPORT1(NetDll___WSAFDIsSet, kNetworking, kImplemented); void RegisterNetExports(xe::cpu::ExportResolver* export_resolver, - KernelState* kernel_state) { - SHIM_SET_MAPPING("xam.xex", NetDll_XNetQosServiceLookup, state); -} + KernelState* kernel_state) {} } // namespace xam } // namespace kernel From e348cacc6dd7e42a921bd598db5abaf9cf0b4d36 Mon Sep 17 00:00:00 2001 From: Sandy Carter Date: Mon, 15 Jul 2019 22:31:55 -0400 Subject: [PATCH 06/12] [debugging linux] Implement functions Check TracerPid in /proc/self/status for attached debugger. Add SIGTRAP handler to prevent signal from halting app while not running in a debugger. Log DebugPrint in clog (stderr). --- src/xenia/base/debugging_posix.cc | 42 ++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/src/xenia/base/debugging_posix.cc b/src/xenia/base/debugging_posix.cc index a9c08ed60..3b73ab12a 100644 --- a/src/xenia/base/debugging_posix.cc +++ b/src/xenia/base/debugging_posix.cc @@ -9,21 +9,51 @@ #include "xenia/base/debugging.h" -#include +#include #include +#include +#include +#include +#include #include "xenia/base/string_buffer.h" namespace xe { namespace debugging { -bool IsDebuggerAttached() { return false; } -void Break() { raise(SIGTRAP); } +bool IsDebuggerAttached() { + std::ifstream proc_status_stream("/proc/self/status"); + if (!proc_status_stream.is_open()) { + return false; + } + std::string line; + while (std::getline(proc_status_stream, line)) { + std::istringstream line_stream(line); + std::string key; + line_stream >> key; + if (key == "TracerPid:") { + uint32_t tracer_pid; + line_stream >> tracer_pid; + return tracer_pid != 0; + } + } + return false; +} + +void Break() { + static std::once_flag flag; + std::call_once(flag, []() { + // Install handler for sigtrap only once + std::signal(SIGTRAP, [](int) { + // Forward signal to default handler after being caught + std::signal(SIGTRAP, SIG_DFL); + }); + }); + std::raise(SIGTRAP); +} namespace internal { -void DebugPrint(const char* s) { - // TODO: proper implementation. -} +void DebugPrint(const char* s) { std::clog << s << std::endl; } } // namespace internal } // namespace debugging From 2a076c924f8802f3478b5fbc957834d49a657b29 Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Tue, 3 Nov 2020 21:54:19 +0100 Subject: [PATCH 07/12] Refactor premake scripts. --- premake5.lua | 48 ++++++++++++++++--------------------- third_party/SDL2-static.lua | 2 +- third_party/spirv-tools.lua | 2 +- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/premake5.lua b/premake5.lua index 22f1fa7f1..7e9f590b2 100644 --- a/premake5.lua +++ b/premake5.lua @@ -24,6 +24,9 @@ defines({ "UNICODE", }) +cppdialect("C++17") +symbols("On") + -- TODO(DrChat): Find a way to disable this on other architectures. if ARCH ~= "ppc64" then filter("architecture:x86_64") @@ -44,30 +47,29 @@ filter("kind:StaticLib") filter("configurations:Checked") runtime("Debug") + optimize("Off") defines({ "DEBUG", }) - runtime("Debug") filter({"configurations:Checked", "platforms:Windows"}) buildoptions({ - "/RTCsu", -- Full Run-Time Checks. + "/RTCsu", -- Full Run-Time Checks. + }) +filter({"configurations:Checked", "platforms:Linux"}) + defines({ + "_GLIBCXX_DEBUG", -- libstdc++ debug mode }) filter("configurations:Debug") - runtime("Debug") + runtime("Release") + optimize("Off") defines({ "DEBUG", "_NO_DEBUG_HEAP=1", }) - runtime("Release") -filter({"configurations:Debug", "platforms:Windows"}) - linkoptions({ - "/NODEFAULTLIB:MSVCRTD", - }) - filter({"configurations:Debug", "platforms:Linux"}) - buildoptions({ - "-g", + defines({ + "_GLIBCXX_DEBUG", -- make dbg symbols work on some distros }) filter("configurations:Release") @@ -76,25 +78,18 @@ filter("configurations:Release") "NDEBUG", "_NO_DEBUG_HEAP=1", }) - optimize("speed") + optimize("Speed") inlining("Auto") floatingpoint("Fast") flags({ "LinkTimeOptimization", }) - runtime("Release") -filter({"configurations:Release", "platforms:Windows"}) - linkoptions({ - "/NODEFAULTLIB:MSVCRTD", - }) - filter("platforms:Linux") system("linux") toolset("clang") - cppdialect("C++17") buildoptions({ -- "-mlzcnt", -- (don't) Assume lzcnt is supported. - "`pkg-config --cflags gtk+-x11-3.0`", + ({os.outputof("pkg-config --cflags gtk+-x11-3.0")})[1], "-fno-lto", -- Premake doesn't support LTO on clang }) links({ @@ -105,14 +100,13 @@ filter("platforms:Linux") "rt", }) linkoptions({ - "`pkg-config --libs gtk+-3.0`", + ({os.outputof("pkg-config --libs gtk+-3.0")})[1], }) filter({"platforms:Linux", "kind:*App"}) linkgroups("On") filter({"platforms:Linux", "language:C++", "toolset:gcc"}) - cppdialect("C++17") links({ }) disablewarnings({ @@ -147,13 +141,11 @@ filter({"platforms:Linux", "language:C++", "toolset:clang", "files:*.cc or *.cpp filter("platforms:Windows") system("windows") toolset("msc") - cppdialect("C++17") buildoptions({ - "/MP", -- Multiprocessor compilation. "/utf-8", -- 'build correctly on systems with non-Latin codepages'. -- Mark warnings as severe - "/w14839", -- non-standard use of class 'type' as an argument to a variadic function - "/w14840", -- non-portable use of class 'type' as an argument to a variadic function + "/w14839", -- non-standard use of class 'type' as an argument to a variadic function + "/w14840", -- non-portable use of class 'type' as an argument to a variadic function -- Disable warnings "/wd4100", -- Unreferenced parameters are ok. "/wd4201", -- Nameless struct/unions are ok. @@ -163,10 +155,10 @@ filter("platforms:Windows") "/wd4189", -- 'local variable is initialized but not referenced'. }) flags({ - "NoMinimalRebuild", -- Required for /MP above. + "MultiProcessorCompile", -- Multiprocessor compilation. + "NoMinimalRebuild", -- Required for /MP above. }) - symbols("On") defines({ "_CRT_NONSTDC_NO_DEPRECATE", "_CRT_SECURE_NO_WARNINGS", diff --git a/third_party/SDL2-static.lua b/third_party/SDL2-static.lua index a9206e300..447ceb325 100644 --- a/third_party/SDL2-static.lua +++ b/third_party/SDL2-static.lua @@ -18,7 +18,7 @@ project("SDL2") "SDL2/include", }) buildoptions({ - "/wd4828", -- illegal characters in file + "/wd4828", -- illegal characters in file https://bugzilla.libsdl.org/show_bug.cgi?id=5333 }) files({ -- 1:1 from SDL.vcxproj file diff --git a/third_party/spirv-tools.lua b/third_party/spirv-tools.lua index bf900a6e9..0e6335b98 100644 --- a/third_party/spirv-tools.lua +++ b/third_party/spirv-tools.lua @@ -73,4 +73,4 @@ project("spirv-tools") buildoptions({ "/wd4800", -- Forcing value to bool 'true' or 'false' "/wd4996", -- Call to 'std::equal' with parameters that may be unsafe - }) \ No newline at end of file + }) From 9dea6b3f62cbf50913180215d4b4b6a0ac4ac178 Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Tue, 3 Nov 2020 21:59:03 +0100 Subject: [PATCH 08/12] Add premake cmake generator. `./build/CMakeLists.txt` is generated by `./xb.bat premake --devenv=cmake` and enables use of other IDEs like `CLion` for example. --- .gitmodules | 3 +++ premake5.lua | 1 + third_party/premake-cmake | 1 + xenia-build | 4 ++-- 4 files changed, 7 insertions(+), 2 deletions(-) create mode 160000 third_party/premake-cmake diff --git a/.gitmodules b/.gitmodules index 6c3ca7278..c8b4ef272 100644 --- a/.gitmodules +++ b/.gitmodules @@ -64,3 +64,6 @@ [submodule "third_party/DirectXShaderCompiler"] path = third_party/DirectXShaderCompiler url = https://github.com/microsoft/DirectXShaderCompiler.git +[submodule "third_party/premake-cmake"] + path = third_party/premake-cmake + url = https://github.com/Enhex/premake-cmake.git diff --git a/premake5.lua b/premake5.lua index 7e9f590b2..2137515ba 100644 --- a/premake5.lua +++ b/premake5.lua @@ -1,5 +1,6 @@ include("tools/build") require("third_party/premake-export-compile-commands/export-compile-commands") +require("third_party/premake-cmake/cmake") location(build_root) targetdir(build_bin) diff --git a/third_party/premake-cmake b/third_party/premake-cmake new file mode 160000 index 000000000..26fbbb996 --- /dev/null +++ b/third_party/premake-cmake @@ -0,0 +1 @@ +Subproject commit 26fbbb9962aefcb1c24aff1e7952033ce1361190 diff --git a/xenia-build b/xenia-build index 081f36481..3b27e656f 100755 --- a/xenia-build +++ b/xenia-build @@ -372,9 +372,9 @@ def run_platform_premake(cc='clang', devenv=None): if 'VSVERSION' in os.environ: vs_version = os.environ['VSVERSION'] - return run_premake('windows', 'vs' + vs_version) + return run_premake('windows', devenv or ('vs' + vs_version)) else: - return run_premake('linux', devenv == 'codelite' and devenv or 'gmake2', cc) + return run_premake('linux', devenv or 'gmake2', cc) def run_premake_export_commands(): From 8b1ebe1130cad40e064920ec46fbb918b446ab56 Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Wed, 4 Nov 2020 16:04:07 +0100 Subject: [PATCH 09/12] Premake: Reorder links to speed up building. - Re-enable LTO on clang. - Set AR on travis so it builds with LTO. --- .travis.yml | 8 ++++-- premake5.lua | 1 - src/xenia/app/premake5.lua | 28 +++++++++--------- src/xenia/gpu/d3d12/premake5.lua | 42 +++++++++++++++------------ src/xenia/gpu/vulkan/premake5.lua | 48 +++++++++++++++++-------------- src/xenia/hid/premake5.lua | 4 +-- 6 files changed, 71 insertions(+), 60 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7536f47a3..188278034 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,9 +28,9 @@ addons: jobs: include: - - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 LINT=true - - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Debug - - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Release + - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 LINT=true + - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 BUILD=true CONFIG=Debug + - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 BUILD=true CONFIG=Release git: # We handle submodules ourselves in xenia-build setup. @@ -40,8 +40,10 @@ before_script: - export LIBVULKAN_VERSION=1.1.70 - export CXX=$CXX_COMPILER - export CC=$C_COMPILER + - export AR=$AR_COMPILER # Dump useful info. - $CXX --version + - $AR_COMPILER --version - python3 --version - clang-format-9 --version - clang-format-9 -style=file -dump-config diff --git a/premake5.lua b/premake5.lua index 2137515ba..fac718955 100644 --- a/premake5.lua +++ b/premake5.lua @@ -91,7 +91,6 @@ filter("platforms:Linux") buildoptions({ -- "-mlzcnt", -- (don't) Assume lzcnt is supported. ({os.outputof("pkg-config --cflags gtk+-x11-3.0")})[1], - "-fno-lto", -- Premake doesn't support LTO on clang }) links({ "stdc++fs", diff --git a/src/xenia/app/premake5.lua b/src/xenia/app/premake5.lua index ac3f48eb4..8d836ff43 100644 --- a/src/xenia/app/premake5.lua +++ b/src/xenia/app/premake5.lua @@ -8,19 +8,6 @@ project("xenia-app") targetname("xenia") language("C++") links({ - "aes_128", - "capstone", - "fmt", - "dxbc", - "discord-rpc", - "glslang-spirv", - "imgui", - "libavcodec", - "libavutil", - "mspack", - "snappy", - "spirv-tools", - "volk", "xenia-app-discord", "xenia-apu", "xenia-apu-nop", @@ -42,6 +29,21 @@ project("xenia-app") "xenia-ui-spirv", "xenia-ui-vulkan", "xenia-vfs", + }) + links({ + "aes_128", + "capstone", + "fmt", + "dxbc", + "discord-rpc", + "glslang-spirv", + "imgui", + "libavcodec", + "libavutil", + "mspack", + "snappy", + "spirv-tools", + "volk", "xxhash", }) defines({ diff --git a/src/xenia/gpu/d3d12/premake5.lua b/src/xenia/gpu/d3d12/premake5.lua index 812e3cc85..afb18abaf 100644 --- a/src/xenia/gpu/d3d12/premake5.lua +++ b/src/xenia/gpu/d3d12/premake5.lua @@ -25,15 +25,6 @@ project("xenia-gpu-d3d12-trace-viewer") kind("WindowedApp") language("C++") links({ - "aes_128", - "capstone", - "dxbc", - "fmt", - "imgui", - "libavcodec", - "libavutil", - "mspack", - "snappy", "xenia-apu", "xenia-apu-nop", "xenia-base", @@ -48,6 +39,17 @@ project("xenia-gpu-d3d12-trace-viewer") "xenia-ui", "xenia-ui-d3d12", "xenia-vfs", + }) + links({ + "aes_128", + "capstone", + "dxbc", + "fmt", + "imgui", + "libavcodec", + "libavutil", + "mspack", + "snappy", "xxhash", }) files({ @@ -70,15 +72,6 @@ project("xenia-gpu-d3d12-trace-dump") kind("ConsoleApp") language("C++") links({ - "aes_128", - "capstone", - "dxbc", - "fmt", - "imgui", - "libavcodec", - "libavutil", - "mspack", - "snappy", "xenia-apu", "xenia-apu-nop", "xenia-base", @@ -93,6 +86,17 @@ project("xenia-gpu-d3d12-trace-dump") "xenia-ui", "xenia-ui-d3d12", "xenia-vfs", + }) + links({ + "aes_128", + "capstone", + "dxbc", + "fmt", + "imgui", + "libavcodec", + "libavutil", + "mspack", + "snappy", "xxhash", }) files({ @@ -107,4 +111,4 @@ project("xenia-gpu-d3d12-trace-dump") "2>&1", "1>scratch/stdout-trace-dump.txt", }) - end \ No newline at end of file + end diff --git a/src/xenia/gpu/vulkan/premake5.lua b/src/xenia/gpu/vulkan/premake5.lua index fada8e143..c1437995f 100644 --- a/src/xenia/gpu/vulkan/premake5.lua +++ b/src/xenia/gpu/vulkan/premake5.lua @@ -30,17 +30,6 @@ project("xenia-gpu-vulkan-trace-viewer") kind("WindowedApp") language("C++") links({ - "aes_128", - "capstone", - "fmt", - "glslang-spirv", - "imgui", - "libavcodec", - "libavutil", - "mspack", - "snappy", - "spirv-tools", - "volk", "xenia-apu", "xenia-apu-nop", "xenia-base", @@ -56,6 +45,19 @@ project("xenia-gpu-vulkan-trace-viewer") "xenia-ui-spirv", "xenia-ui-vulkan", "xenia-vfs", + }) + links({ + "aes_128", + "capstone", + "fmt", + "glslang-spirv", + "imgui", + "libavcodec", + "libavutil", + "mspack", + "snappy", + "spirv-tools", + "volk", "xxhash", }) defines({ @@ -97,17 +99,6 @@ project("xenia-gpu-vulkan-trace-dump") kind("ConsoleApp") language("C++") links({ - "aes_128", - "capstone", - "fmt", - "glslang-spirv", - "imgui", - "libavcodec", - "libavutil", - "mspack", - "snappy", - "spirv-tools", - "volk", "xenia-apu", "xenia-apu-nop", "xenia-base", @@ -123,6 +114,19 @@ project("xenia-gpu-vulkan-trace-dump") "xenia-ui-spirv", "xenia-ui-vulkan", "xenia-vfs", + }) + links({ + "aes_128", + "capstone", + "fmt", + "glslang-spirv", + "imgui", + "libavcodec", + "libavutil", + "mspack", + "snappy", + "spirv-tools", + "volk", "xxhash", }) defines({ diff --git a/src/xenia/hid/premake5.lua b/src/xenia/hid/premake5.lua index 152887e2b..348e12371 100644 --- a/src/xenia/hid/premake5.lua +++ b/src/xenia/hid/premake5.lua @@ -41,11 +41,11 @@ project("xenia-hid-demo") filter("platforms:Linux") links({ + "SDL2", + "vulkan", "X11", "xcb", "X11-xcb", - "vulkan", - "SDL2", }) filter("platforms:Windows") From 06214c544a237076c4fb4c8312841f78253ab00d Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Thu, 5 Nov 2020 11:52:48 +0100 Subject: [PATCH 10/12] [CPU] std::sort compare: satisfy comp(a,a)==false --- src/xenia/cpu/export_resolver.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xenia/cpu/export_resolver.cc b/src/xenia/cpu/export_resolver.cc index ecc5d8246..b05df5d83 100644 --- a/src/xenia/cpu/export_resolver.cc +++ b/src/xenia/cpu/export_resolver.cc @@ -30,7 +30,7 @@ ExportResolver::Table::Table(const std::string_view module_name, } std::sort( exports_by_name_.begin(), exports_by_name_.end(), - [](Export* a, Export* b) { return std::strcmp(a->name, b->name) <= 0; }); + [](Export* a, Export* b) { return std::strcmp(a->name, b->name) < 0; }); } ExportResolver::ExportResolver() = default; @@ -51,7 +51,7 @@ void ExportResolver::RegisterTable( } std::sort( all_exports_by_name_.begin(), all_exports_by_name_.end(), - [](Export* a, Export* b) { return std::strcmp(a->name, b->name) <= 0; }); + [](Export* a, Export* b) { return std::strcmp(a->name, b->name) < 0; }); } Export* ExportResolver::GetExportByOrdinal(const std::string_view module_name, From 9233f85c30231555d016efde7437892139438db5 Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Thu, 5 Nov 2020 12:26:04 +0100 Subject: [PATCH 11/12] [docs] CMake generation. --- docs/building.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/building.md b/docs/building.md index 6aafc521e..2715d79a5 100644 --- a/docs/building.md +++ b/docs/building.md @@ -91,12 +91,14 @@ Linux support is extremely experimental and presently incomplete. The build script uses LLVM/Clang 9. GCC while it should work in theory, is not easily interchangeable right now. -[CodeLite](https://codelite.org) is the supported IDE and `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website. -Normal building via `xb build` uses Make. +* Normal building via `xb build` uses Make. +* [CodeLite](https://codelite.org) is supported. `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website. +* Experimental CMake generation is available to facilitate use of other IDEs such as [CLion](https://www.jetbrains.com/clion/). `build/CMakeLists.txt` is generated by invoking `xb premake --devenv=cmake`. Clang-9 or newer should be available from system repositories on all up to date distributions. You will also need some development libraries. To get them on an Ubuntu system: -``` + +```bash sudo apt-get install libgtk-3-dev libpthread-stubs0-dev liblz4-dev libx11-dev libvulkan-dev libsdl2-dev libiberty-dev libunwind-dev libc++-dev libc++abi-dev ``` From 171c97c9294460450b93ac635d225c2515e2c7e2 Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Sun, 8 Nov 2020 22:28:36 +0100 Subject: [PATCH 12/12] Start CLion by invoking `xb devenv` when available --- docs/building.md | 2 +- xenia-build | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/docs/building.md b/docs/building.md index 2715d79a5..0a70fb206 100644 --- a/docs/building.md +++ b/docs/building.md @@ -93,7 +93,7 @@ interchangeable right now. * Normal building via `xb build` uses Make. * [CodeLite](https://codelite.org) is supported. `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website. -* Experimental CMake generation is available to facilitate use of other IDEs such as [CLion](https://www.jetbrains.com/clion/). `build/CMakeLists.txt` is generated by invoking `xb premake --devenv=cmake`. +* Experimental CMake generation is available to facilitate use of other IDEs such as [CLion](https://www.jetbrains.com/clion/). If `clion` is available inside `$PATH`, `xb devenv` will start it. Otherwise `build/CMakeLists.txt` needs to be generated by invoking `xb premake --devenv=cmake` manually. Clang-9 or newer should be available from system repositories on all up to date distributions. You will also need some development libraries. To get them on an Ubuntu system: diff --git a/xenia-build b/xenia-build index 3b27e656f..89a14c651 100755 --- a/xenia-build +++ b/xenia-build @@ -88,6 +88,16 @@ def main(): sys.exit(return_code) +def print_box(msg): + """Prints an important message inside a box + """ + print( + '┌{0:─^{2}}╖\n' + '│{1: ^{2}}║\n' + '╘{0:═^{2}}╝\n' + .format('', msg, len(msg) + 2)) + + def import_vs_environment(): """Finds the installed Visual Studio version and imports interesting environment variables into os.environ. @@ -153,6 +163,7 @@ def import_subprocess_environment(args): os.environ[var.upper()] = setting break + def has_bin(binary): """Checks whether the given binary is present. @@ -408,6 +419,43 @@ def get_build_bin_path(args): return os.path.join(self_path, 'build', 'bin', platform.capitalize(), args['config'].capitalize()) +def create_clion_workspace(): + """Creates some basic workspace information inside the .idea directory for first start. + """ + if os.path.exists('.idea'): + # No first start + return False + print('Generating CLion workspace files...') + # Might become easier in the future: https://youtrack.jetbrains.com/issue/CPP-7911 + + # Set the location of the CMakeLists.txt + os.mkdir('.idea') + with open(os.path.join('.idea', 'misc.xml'), 'w') as f: + f.write(""" + + + + + +""") + + # Set available configurations + # TODO Find a way to trigger a cmake reload + with open(os.path.join('.idea', 'workspace.xml'), 'w') as f: + f.write(""" + + + + + + + + +""") + + return True + + def discover_commands(subparsers): """Looks for all commands and returns a dictionary of them. In the future commands could be discovered on disk. @@ -1446,8 +1494,13 @@ class DevenvCommand(Command): def execute(self, args, pass_args, cwd): devenv = None + show_reload_prompt = False if sys.platform == 'win32': print('Launching Visual Studio...') + elif has_bin('clion') or has_bin('clion.sh'): + print('Launching CLion...') + show_reload_prompt = create_clion_workspace() + devenv = 'cmake' else: print('Launching CodeLite...') devenv = 'codelite' @@ -1458,11 +1511,23 @@ class DevenvCommand(Command): print('') print('- launching devenv...') + if show_reload_prompt: + print_box('Please run "File ⇒ ↺ Reload CMake Project" from inside the IDE!') if sys.platform == 'win32': shell_call([ 'devenv', 'build\\xenia.sln', ]) + elif has_bin('clion'): + shell_call([ + 'clion', + '.', + ]) + elif has_bin('clion.sh'): + shell_call([ + 'clion.sh', + '.', + ]) else: shell_call([ 'codelite',