[D3D12] Cleanup: pipeline state -> pipeline, other things

This commit is contained in:
Triang3l 2020-11-14 16:43:18 +03:00
parent 87a3c5fac2
commit 6b988d43c7
20 changed files with 475 additions and 490 deletions

View File

@ -387,7 +387,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
sampler_count_vertex); sampler_count_vertex);
return nullptr; return nullptr;
} }
root_signatures_bindful_.insert({index, root_signature}); root_signatures_bindful_.emplace(index, root_signature);
return root_signature; return root_signature;
} }
@ -745,12 +745,11 @@ void D3D12CommandProcessor::SetSamplePositions(
current_sample_positions_ = sample_positions; current_sample_positions_ = sample_positions;
} }
void D3D12CommandProcessor::SetComputePipelineState( void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
ID3D12PipelineState* pipeline_state) { if (current_external_pipeline_ != pipeline) {
if (current_external_pipeline_state_ != pipeline_state) { deferred_command_list_.D3DSetPipelineState(pipeline);
deferred_command_list_.D3DSetPipelineState(pipeline_state); current_external_pipeline_ = pipeline;
current_external_pipeline_state_ = pipeline_state; current_cached_pipeline_ = nullptr;
current_cached_pipeline_state_ = nullptr;
} }
} }
@ -773,8 +772,16 @@ std::string D3D12CommandProcessor::GetWindowTitleText() const {
} }
// Currently scaling is only supported with ROV. // Currently scaling is only supported with ROV.
if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) { if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) {
return "Direct3D 12 - 2x"; return "Direct3D 12 - ROV 2x";
} }
// Rasterizer-ordered views are a feature very rarely used as of 2020 and
// that faces adoption complications (outside of Direct3D - on Vulkan - at
// least), but crucial to Xenia - raise awareness of its usage.
// https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319
// "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I
// wasn't aware that Xenia D3D12 backend was using Raster Order Views
// feature" - oscarbg in that issue.
return "Direct3D 12 - ROV";
} }
return "Direct3D 12"; return "Direct3D 12";
} }
@ -1196,7 +1203,7 @@ bool D3D12CommandProcessor::SetupContext() {
*this, *register_file_, bindless_resources_used_, edram_rov_used_, *this, *register_file_, bindless_resources_used_, edram_rov_used_,
texture_cache_->IsResolutionScale2X() ? 2 : 1); texture_cache_->IsResolutionScale2X() ? 2 : 1);
if (!pipeline_cache_->Initialize()) { if (!pipeline_cache_->Initialize()) {
XELOGE("Failed to initialize the graphics pipeline state cache"); XELOGE("Failed to initialize the graphics pipeline cache");
return false; return false;
} }
@ -1526,8 +1533,7 @@ void D3D12CommandProcessor::ShutdownContext() {
// Shut down binding - bindless descriptors may be owned by subsystems like // Shut down binding - bindless descriptors may be owned by subsystems like
// the texture cache. // the texture cache.
// Root signatured are used by pipeline states, thus freed after the pipeline // Root signatures are used by pipelines, thus freed after the pipelines.
// states.
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_); ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_);
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_); ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_);
for (auto it : root_signatures_bindful_) { for (auto it : root_signatures_bindful_) {
@ -1878,7 +1884,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
xenos::VertexShaderExportMode::kMultipass || xenos::VertexShaderExportMode::kMultipass ||
(primitive_two_faced && pa_su_sc_mode_cntl.cull_front && (primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
pa_su_sc_mode_cntl.cull_back))) { pa_su_sc_mode_cntl.cull_back))) {
// All faces are culled - can't be expressed in the pipeline state. // All faces are culled - can't be expressed in the pipeline.
return true; return true;
} }
@ -1954,7 +1960,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
line_loop_closing_index = 0; line_loop_closing_index = 0;
} }
// Update the textures - this may bind pipeline state objects. // Update the textures - this may bind pipelines.
uint32_t used_texture_mask = uint32_t used_texture_mask =
vertex_shader->GetUsedTextureMask() | vertex_shader->GetUsedTextureMask() |
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0); (pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
@ -1972,21 +1978,21 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
early_z = true; early_z = true;
} }
// Create the pipeline state object if needed and bind it. // Create the pipeline if needed and bind it.
void* pipeline_state_handle; void* pipeline_handle;
ID3D12RootSignature* root_signature; ID3D12RootSignature* root_signature;
if (!pipeline_cache_->ConfigurePipeline( if (!pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type_converted, vertex_shader, pixel_shader, primitive_type_converted,
indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16, indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16,
early_z, pipeline_render_targets, &pipeline_state_handle, early_z, pipeline_render_targets, &pipeline_handle,
&root_signature)) { &root_signature)) {
return false; return false;
} }
if (current_cached_pipeline_state_ != pipeline_state_handle) { if (current_cached_pipeline_ != pipeline_handle) {
deferred_command_list_.SetPipelineStateHandle( deferred_command_list_.SetPipelineStateHandle(
reinterpret_cast<void*>(pipeline_state_handle)); reinterpret_cast<void*>(pipeline_handle));
current_cached_pipeline_state_ = pipeline_state_handle; current_cached_pipeline_ = pipeline_handle;
current_external_pipeline_state_ = nullptr; current_external_pipeline_ = nullptr;
} }
// Update viewport, scissor, blend factor and stencil reference. // Update viewport, scissor, blend factor and stencil reference.
@ -2005,14 +2011,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
} }
// Must not call anything that can change the descriptor heap from now on! // Must not call anything that can change the descriptor heap from now on!
// Ensure vertex and index buffers are resident and draw. // Ensure vertex buffers are resident.
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture // TODO(Triang3l): Cache residency for ranges in a way similar to how texture
// validity will be tracked. // validity is tracked.
uint64_t vertex_buffers_resident[2] = {}; uint64_t vertex_buffers_resident[2] = {};
for (const auto& vertex_binding : vertex_shader->vertex_bindings()) { for (const Shader::VertexBinding& vertex_binding :
vertex_shader->vertex_bindings()) {
uint32_t vfetch_index = vertex_binding.fetch_constant; uint32_t vfetch_index = vertex_binding.fetch_constant;
if (vertex_buffers_resident[vfetch_index >> 6] & if (vertex_buffers_resident[vfetch_index >> 6] &
(1ull << (vfetch_index & 63))) { (uint64_t(1) << (vfetch_index & 63))) {
continue; continue;
} }
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>( const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
@ -2045,7 +2052,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
vfetch_constant.address << 2, vfetch_constant.size << 2); vfetch_constant.address << 2, vfetch_constant.size << 2);
return false; return false;
} }
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
<< (vfetch_index & 63);
} }
// Gather memexport ranges and ensure the heaps for them are resident, and // Gather memexport ranges and ensure the heaps for them are resident, and
@ -2517,8 +2525,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
submission_open_ = true; submission_open_ = true;
// Start a new deferred command list - will submit it to the real one in the // Start a new deferred command list - will submit it to the real one in the
// end of the submission (when async pipeline state object creation requests // end of the submission (when async pipeline creation requests are
// are fulfilled). // fulfilled).
deferred_command_list_.Reset(); deferred_command_list_.Reset();
// Reset cached state of the command list. // Reset cached state of the command list.
@ -2527,8 +2535,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
ff_blend_factor_update_needed_ = true; ff_blend_factor_update_needed_ = true;
ff_stencil_ref_update_needed_ = true; ff_stencil_ref_update_needed_ = true;
current_sample_positions_ = xenos::MsaaSamples::k1X; current_sample_positions_ = xenos::MsaaSamples::k1X;
current_cached_pipeline_state_ = nullptr; current_cached_pipeline_ = nullptr;
current_external_pipeline_state_ = nullptr; current_external_pipeline_ = nullptr;
current_graphics_root_signature_ = nullptr; current_graphics_root_signature_ = nullptr;
current_graphics_root_up_to_date_ = 0; current_graphics_root_up_to_date_ = 0;
if (bindless_resources_used_) { if (bindless_resources_used_) {
@ -2724,7 +2732,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
} }
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const { bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates(); return !submission_open_ || !pipeline_cache_->IsCreatingPipelines();
} }
void D3D12CommandProcessor::ClearCommandAllocatorCache() { void D3D12CommandProcessor::ClearCommandAllocatorCache() {
@ -2745,12 +2753,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() {
} }
void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
auto& regs = *register_file_;
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
// Window parameters. // Window parameters.
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
// See r200UpdateWindow: // See r200UpdateWindow:
@ -2846,14 +2854,14 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
scissor.right = pa_sc_window_scissor_br.br_x; scissor.right = pa_sc_window_scissor_br.br_x;
scissor.bottom = pa_sc_window_scissor_br.br_y; scissor.bottom = pa_sc_window_scissor_br.br_y;
if (!pa_sc_window_scissor_tl.window_offset_disable) { if (!pa_sc_window_scissor_tl.window_offset_disable) {
scissor.left = scissor.left = std::max(
std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0)); LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0));
scissor.top = scissor.top = std::max(
std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0)); LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0));
scissor.right = scissor.right = std::max(
std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0)); LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0));
scissor.bottom = scissor.bottom = std::max(
std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0)); LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0));
} }
scissor.left *= pixel_size_x; scissor.left *= pixel_size_x;
scissor.top *= pixel_size_y; scissor.top *= pixel_size_y;
@ -2915,12 +2923,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
uint32_t line_loop_closing_index, xenos::Endian index_endian, uint32_t line_loop_closing_index, xenos::Endian index_endian,
uint32_t used_texture_mask, bool early_z, uint32_t color_mask, uint32_t used_texture_mask, bool early_z, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]) { const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
auto& regs = *register_file_;
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>(); auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>(); auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>(); auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
@ -3103,14 +3110,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index; dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index;
system_constants_.line_loop_closing_index = line_loop_closing_index; system_constants_.line_loop_closing_index = line_loop_closing_index;
// Vertex index offset.
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
system_constants_.vertex_base_index = vgt_indx_offset;
// Index or tessellation edge factor buffer endianness. // Index or tessellation edge factor buffer endianness.
dirty |= system_constants_.vertex_index_endian != index_endian; dirty |= system_constants_.vertex_index_endian != index_endian;
system_constants_.vertex_index_endian = index_endian; system_constants_.vertex_index_endian = index_endian;
// Vertex index offset.
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
system_constants_.vertex_base_index = vgt_indx_offset;
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE. // User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
if (!pa_cl_clip_cntl.clip_disable) { if (!pa_cl_clip_cntl.clip_disable) {
for (uint32_t i = 0; i < 6; ++i) { for (uint32_t i = 0; i < 6; ++i) {
@ -3574,7 +3581,7 @@ bool D3D12CommandProcessor::UpdateBindings(
float_constant_map_vertex.float_bitmap[i]; float_constant_map_vertex.float_bitmap[i];
// If no float constants at all, we can reuse any buffer for them, so not // If no float constants at all, we can reuse any buffer for them, so not
// invalidating. // invalidating.
if (float_constant_map_vertex.float_count != 0) { if (float_constant_count_vertex) {
cbuffer_binding_float_vertex_.up_to_date = false; cbuffer_binding_float_vertex_.up_to_date = false;
} }
} }
@ -3589,7 +3596,7 @@ bool D3D12CommandProcessor::UpdateBindings(
float_constant_map_pixel.float_bitmap[i]) { float_constant_map_pixel.float_bitmap[i]) {
current_float_constant_map_pixel_[i] = current_float_constant_map_pixel_[i] =
float_constant_map_pixel.float_bitmap[i]; float_constant_map_pixel.float_bitmap[i];
if (float_constant_map_pixel.float_count != 0) { if (float_constant_count_pixel) {
cbuffer_binding_float_pixel_.up_to_date = false; cbuffer_binding_float_pixel_.up_to_date = false;
} }
} }
@ -3889,8 +3896,8 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_parameters, sampler_parameters,
provider.OffsetSamplerDescriptor( provider.OffsetSamplerDescriptor(
sampler_bindless_heap_cpu_start_, sampler_index)); sampler_bindless_heap_cpu_start_, sampler_index));
texture_cache_bindless_sampler_map_.insert( texture_cache_bindless_sampler_map_.emplace(
{sampler_parameters.value, sampler_index}); sampler_parameters.value, sampler_index);
} }
current_sampler_bindless_indices_vertex_[j] = sampler_index; current_sampler_bindless_indices_vertex_[j] = sampler_index;
} }
@ -3921,8 +3928,8 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_parameters, sampler_parameters,
provider.OffsetSamplerDescriptor( provider.OffsetSamplerDescriptor(
sampler_bindless_heap_cpu_start_, sampler_index)); sampler_bindless_heap_cpu_start_, sampler_index));
texture_cache_bindless_sampler_map_.insert( texture_cache_bindless_sampler_map_.emplace(
{sampler_parameters.value, sampler_index}); sampler_parameters.value, sampler_index);
} }
current_sampler_bindless_indices_pixel_[j] = sampler_index; current_sampler_bindless_indices_pixel_[j] = sampler_index;
} }

View File

@ -186,19 +186,17 @@ class D3D12CommandProcessor : public CommandProcessor {
// render targets or copying to depth render targets. // render targets or copying to depth render targets.
void SetSamplePositions(xenos::MsaaSamples sample_positions); void SetSamplePositions(xenos::MsaaSamples sample_positions);
// Returns a pipeline state object with deferred creation by its handle. May // Returns a pipeline with deferred creation by its handle. May return nullptr
// return nullptr if failed to create the pipeline state object. // if failed to create the pipeline.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle( inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
void* handle) const { return pipeline_cache_->GetD3D12PipelineByHandle(handle);
return pipeline_cache_->GetD3D12PipelineStateByHandle(handle);
} }
// Sets the current pipeline state to a compute one. This is for cache // Sets the current pipeline to a compute one. This is for cache invalidation
// invalidation primarily. A submission must be open. // primarily. A submission must be open.
void SetComputePipelineState(ID3D12PipelineState* pipeline_state); void SetComputePipeline(ID3D12PipelineState* pipeline);
// For the pipeline state cache to call when binding layout UIDs may be // For the pipeline cache to call when binding layout UIDs may be reused.
// reused.
void NotifyShaderBindingsLayoutUIDsInvalidated(); void NotifyShaderBindingsLayoutUIDsInvalidated();
// Returns the text to display in the GPU backend name in the window title. // Returns the text to display in the GPU backend name in the window title.
@ -323,8 +321,8 @@ class D3D12CommandProcessor : public CommandProcessor {
bool EndSubmission(bool is_swap); bool EndSubmission(bool is_swap);
// Checks if ending a submission right now would not cause potentially more // Checks if ending a submission right now would not cause potentially more
// delay than it would reduce by making the GPU start working earlier - such // delay than it would reduce by making the GPU start working earlier - such
// as when there are unfinished graphics pipeline state creation requests that // as when there are unfinished graphics pipeline creation requests that would
// would need to be fulfilled before actually submitting the command list. // need to be fulfilled before actually submitting the command list.
bool CanEndSubmissionImmediately() const; bool CanEndSubmissionImmediately() const;
bool AwaitAllQueueOperationsCompletion() { bool AwaitAllQueueOperationsCompletion() {
CheckSubmissionFence(submission_current_); CheckSubmissionFence(submission_current_);
@ -548,13 +546,12 @@ class D3D12CommandProcessor : public CommandProcessor {
// Current SSAA sample positions (to be updated by the render target cache). // Current SSAA sample positions (to be updated by the render target cache).
xenos::MsaaSamples current_sample_positions_; xenos::MsaaSamples current_sample_positions_;
// Currently bound pipeline state, either a graphics pipeline state object // Currently bound pipeline, either a graphics pipeline from the pipeline
// from the pipeline state cache (with potentially deferred creation - // cache (with potentially deferred creation - current_external_pipeline_ is
// current_external_pipeline_state_ is nullptr in this case) or a non-Xenos // nullptr in this case) or a non-Xenos graphics or compute pipeline
// graphics or compute pipeline state object (current_cached_pipeline_state_ // (current_cached_pipeline_ is nullptr in this case).
// is nullptr in this case). void* current_cached_pipeline_;
void* current_cached_pipeline_state_; ID3D12PipelineState* current_external_pipeline_;
ID3D12PipelineState* current_external_pipeline_state_;
// Currently bound graphics root signature. // Currently bound graphics root signature.
ID3D12RootSignature* current_graphics_root_signature_; ID3D12RootSignature* current_graphics_root_signature_;

View File

@ -157,7 +157,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
stretch_pipeline_desc.SampleDesc.Count = 1; stretch_pipeline_desc.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState( if (FAILED(device->CreateGraphicsPipelineState(
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) { &stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) {
XELOGE("Failed to create the front buffer stretch pipeline state"); XELOGE("Failed to create the front buffer stretch pipeline");
stretch_gamma_root_signature_->Release(); stretch_gamma_root_signature_->Release();
stretch_gamma_root_signature_ = nullptr; stretch_gamma_root_signature_ = nullptr;
stretch_root_signature_->Release(); stretch_root_signature_->Release();
@ -170,8 +170,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
if (FAILED(device->CreateGraphicsPipelineState( if (FAILED(device->CreateGraphicsPipelineState(
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) { &stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) {
XELOGE( XELOGE(
"Failed to create the gamma-correcting front buffer stretch " "Failed to create the gamma-correcting front buffer stretch pipeline");
"pipeline state");
stretch_pipeline_->Release(); stretch_pipeline_->Release();
stretch_pipeline_ = nullptr; stretch_pipeline_ = nullptr;
stretch_gamma_root_signature_->Release(); stretch_gamma_root_signature_->Release();

View File

@ -85,7 +85,7 @@ class D3D12Shader : public Shader {
return sampler_bindings_.data(); return sampler_bindings_.data();
} }
// For owning subsystems like the pipeline state cache, accessors for unique // For owning subsystems like the pipeline cache, accessors for unique
// identifiers (used instead of hashes to make sure collisions can't happen) // identifiers (used instead of hashes to make sure collisions can't happen)
// of binding layouts used by the shader, for invalidation if a shader with an // of binding layouts used by the shader, for invalidation if a shader with an
// incompatible layout was bound. // incompatible layout was bound.

View File

@ -209,9 +209,8 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
} }
} break; } break;
case Command::kSetPipelineStateHandle: { case Command::kSetPipelineStateHandle: {
current_pipeline_state = current_pipeline_state = command_processor_.GetD3D12PipelineByHandle(
command_processor_.GetD3D12PipelineStateByHandle( *reinterpret_cast<void* const*>(stream));
*reinterpret_cast<void* const*>(stream));
if (current_pipeline_state) { if (current_pipeline_state) {
command_list->SetPipelineState(current_pipeline_state); command_list->SetPipelineState(current_pipeline_state);
} }

View File

@ -43,10 +43,10 @@ DEFINE_bool(
"D3D12"); "D3D12");
DEFINE_int32( DEFINE_int32(
d3d12_pipeline_creation_threads, -1, d3d12_pipeline_creation_threads, -1,
"Number of threads used for graphics pipeline state object creation. -1 to " "Number of threads used for graphics pipeline creation. -1 to calculate "
"calculate automatically (75% of logical CPU cores), a positive number to " "automatically (75% of logical CPU cores), a positive number to specify "
"specify the number of threads explicitly (up to the number of logical CPU " "the number of threads explicitly (up to the number of logical CPU cores), "
"cores), 0 to disable multithreaded pipeline state object creation.", "0 to disable multithreaded pipeline creation.",
"D3D12"); "D3D12");
DEFINE_bool(d3d12_tessellation_wireframe, false, DEFINE_bool(d3d12_tessellation_wireframe, false,
"Display tessellated surfaces as wireframe for debugging.", "Display tessellated surfaces as wireframe for debugging.",
@ -125,8 +125,8 @@ bool PipelineCache::Initialize() {
logical_processor_count = 6; logical_processor_count = 6;
} }
// Initialize creation thread synchronization data even if not using creation // Initialize creation thread synchronization data even if not using creation
// threads because they may be used anyway to create pipeline state objects // threads because they may be used anyway to create pipelines from the
// from the storage. // storage.
creation_threads_busy_ = 0; creation_threads_busy_ = 0;
creation_completion_event_ = creation_completion_event_ =
xe::threading::Event::CreateManualResetEvent(true); xe::threading::Event::CreateManualResetEvent(true);
@ -145,7 +145,7 @@ bool PipelineCache::Initialize() {
for (size_t i = 0; i < creation_thread_count; ++i) { for (size_t i = 0; i < creation_thread_count; ++i) {
std::unique_ptr<xe::threading::Thread> creation_thread = std::unique_ptr<xe::threading::Thread> creation_thread =
xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); }); xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); });
creation_thread->set_name("D3D12 Pipeline States"); creation_thread->set_name("D3D12 Pipelines");
creation_threads_.push_back(std::move(creation_thread)); creation_threads_.push_back(std::move(creation_thread));
} }
} }
@ -184,13 +184,12 @@ void PipelineCache::ClearCache(bool shutting_down) {
} }
ShutdownShaderStorage(); ShutdownShaderStorage();
// Remove references to the current pipeline state object. // Remove references to the current pipeline.
current_pipeline_state_ = nullptr; current_pipeline_ = nullptr;
if (!creation_threads_.empty()) { if (!creation_threads_.empty()) {
// Empty the pipeline state object creation queue and make sure there are no // Empty the pipeline creation queue and make sure there are no threads
// threads currently creating pipeline state objects because pipeline states // currently creating pipelines because pipelines are going to be deleted.
// are going to be deleted.
bool await_creation_completion_event = false; bool await_creation_completion_event = false;
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
@ -207,13 +206,13 @@ void PipelineCache::ClearCache(bool shutting_down) {
} }
} }
// Destroy all pipeline state objects. // Destroy all pipelines.
for (auto it : pipeline_states_) { for (auto it : pipelines_) {
it.second->state->Release(); it.second->state->Release();
delete it.second; delete it.second;
} }
pipeline_states_.clear(); pipelines_.clear();
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0); COUNT_profile_set("gpu/pipeline_cache/pipelines", 0);
// Destroy all shaders. // Destroy all shaders.
command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated(); command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated();
@ -223,10 +222,10 @@ void PipelineCache::ClearCache(bool shutting_down) {
} }
texture_binding_layout_map_.clear(); texture_binding_layout_map_.clear();
texture_binding_layouts_.clear(); texture_binding_layouts_.clear();
for (auto it : shader_map_) { for (auto it : shaders_) {
delete it.second; delete it.second;
} }
shader_map_.clear(); shaders_.clear();
if (reinitialize_shader_storage) { if (reinitialize_shader_storage) {
InitializeShaderStorage(shader_storage_root, shader_storage_title_id, InitializeShaderStorage(shader_storage_root, shader_storage_title_id,
@ -374,8 +373,7 @@ void PipelineCache::InitializeShaderStorage(
} }
size_t ucode_byte_count = size_t ucode_byte_count =
shader_header.ucode_dword_count * sizeof(uint32_t); shader_header.ucode_dword_count * sizeof(uint32_t);
if (shader_map_.find(shader_header.ucode_data_hash) != if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) {
shader_map_.end()) {
// Already added - usually shaders aren't added without the intention of // Already added - usually shaders aren't added without the intention of
// translating them imminently, so don't do additional checks to // translating them imminently, so don't do additional checks to
// actually ensure that translation happens right now (they would cause // actually ensure that translation happens right now (they would cause
@ -402,7 +400,7 @@ void PipelineCache::InitializeShaderStorage(
D3D12Shader* shader = D3D12Shader* shader =
new D3D12Shader(shader_header.type, ucode_data_hash, new D3D12Shader(shader_header.type, ucode_data_hash,
ucode_dwords.data(), shader_header.ucode_dword_count); ucode_dwords.data(), shader_header.ucode_dword_count);
shader_map_.insert({ucode_data_hash, shader}); shaders_.emplace(ucode_data_hash, shader);
// Create new threads if the currently existing threads can't keep up with // Create new threads if the currently existing threads can't keep up with
// file reading, but not more than the number of logical processors minus // file reading, but not more than the number of logical processors minus
// one. // one.
@ -439,7 +437,7 @@ void PipelineCache::InitializeShaderStorage(
} }
shader_translation_threads.clear(); shader_translation_threads.clear();
for (D3D12Shader* shader : shaders_failed_to_translate) { for (D3D12Shader* shader : shaders_failed_to_translate) {
shader_map_.erase(shader->ucode_data_hash()); shaders_.erase(shader->ucode_data_hash());
delete shader; delete shader;
} }
} }
@ -460,72 +458,66 @@ void PipelineCache::InitializeShaderStorage(
} }
// 'DXRO' or 'DXRT'. // 'DXRO' or 'DXRT'.
const uint32_t pipeline_state_storage_magic_api = const uint32_t pipeline_storage_magic_api =
edram_rov_used_ ? 0x4F525844 : 0x54525844; edram_rov_used_ ? 0x4F525844 : 0x54525844;
// Initialize the pipeline state storage stream. // Initialize the pipeline storage stream.
uint64_t pipeline_state_storage_initialization_start_ = uint64_t pipeline_storage_initialization_start_ =
xe::Clock::QueryHostTickCount(); xe::Clock::QueryHostTickCount();
auto pipeline_state_storage_file_path = auto pipeline_storage_file_path =
shader_storage_shareable_root / shader_storage_shareable_root /
fmt::format("{:08X}.{}.d3d12.xpso", title_id, fmt::format("{:08X}.{}.d3d12.xpso", title_id,
edram_rov_used_ ? "rov" : "rtv"); edram_rov_used_ ? "rov" : "rtv");
pipeline_state_storage_file_ = pipeline_storage_file_ =
xe::filesystem::OpenFile(pipeline_state_storage_file_path, "a+b"); xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b");
if (!pipeline_state_storage_file_) { if (!pipeline_storage_file_) {
XELOGE( XELOGE(
"Failed to open the Direct3D 12 pipeline state description storage " "Failed to open the Direct3D 12 pipeline description storage file for "
"file for writing, persistent shader storage will be disabled: {}", "writing, persistent shader storage will be disabled: {}",
xe::path_to_utf8(pipeline_state_storage_file_path)); xe::path_to_utf8(pipeline_storage_file_path));
fclose(shader_storage_file_); fclose(shader_storage_file_);
shader_storage_file_ = nullptr; shader_storage_file_ = nullptr;
return; return;
} }
pipeline_state_storage_file_flush_needed_ = false; pipeline_storage_file_flush_needed_ = false;
// 'XEPS'. // 'XEPS'.
const uint32_t pipeline_state_storage_magic = 0x53504558; const uint32_t pipeline_storage_magic = 0x53504558;
struct { struct {
uint32_t magic; uint32_t magic;
uint32_t magic_api; uint32_t magic_api;
uint32_t version_swapped; uint32_t version_swapped;
} pipeline_state_storage_file_header; } pipeline_storage_file_header;
if (fread(&pipeline_state_storage_file_header, if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
sizeof(pipeline_state_storage_file_header), 1, 1, pipeline_storage_file_) &&
pipeline_state_storage_file_) && pipeline_storage_file_header.magic == pipeline_storage_magic &&
pipeline_state_storage_file_header.magic == pipeline_storage_file_header.magic_api == pipeline_storage_magic_api &&
pipeline_state_storage_magic && xe::byte_swap(pipeline_storage_file_header.version_swapped) ==
pipeline_state_storage_file_header.magic_api ==
pipeline_state_storage_magic_api &&
xe::byte_swap(pipeline_state_storage_file_header.version_swapped) ==
PipelineDescription::kVersion) { PipelineDescription::kVersion) {
uint64_t pipeline_state_storage_valid_bytes = uint64_t pipeline_storage_valid_bytes =
sizeof(pipeline_state_storage_file_header); sizeof(pipeline_storage_file_header);
// Enqueue pipeline state descriptions written by previous Xenia executions // Enqueue pipeline descriptions written by previous Xenia executions until
// until the end of the file or until a corrupted one is detected. // the end of the file or until a corrupted one is detected.
xe::filesystem::Seek(pipeline_state_storage_file_, 0, SEEK_END); xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END);
int64_t pipeline_state_storage_told_end = int64_t pipeline_storage_told_end =
xe::filesystem::Tell(pipeline_state_storage_file_); xe::filesystem::Tell(pipeline_storage_file_);
size_t pipeline_state_storage_told_count = size_t pipeline_storage_told_count = size_t(
size_t(pipeline_state_storage_told_end >= pipeline_storage_told_end >= int64_t(pipeline_storage_valid_bytes)
int64_t(pipeline_state_storage_valid_bytes) ? (uint64_t(pipeline_storage_told_end) -
? (uint64_t(pipeline_state_storage_told_end) - pipeline_storage_valid_bytes) /
pipeline_state_storage_valid_bytes) / sizeof(PipelineStoredDescription)
sizeof(PipelineStoredDescription) : 0);
: 0); if (pipeline_storage_told_count &&
if (pipeline_state_storage_told_count && xe::filesystem::Seek(pipeline_storage_file_,
xe::filesystem::Seek(pipeline_state_storage_file_, int64_t(pipeline_storage_valid_bytes), SEEK_SET)) {
int64_t(pipeline_state_storage_valid_bytes),
SEEK_SET)) {
std::vector<PipelineStoredDescription> pipeline_stored_descriptions; std::vector<PipelineStoredDescription> pipeline_stored_descriptions;
pipeline_stored_descriptions.resize(pipeline_state_storage_told_count); pipeline_stored_descriptions.resize(pipeline_storage_told_count);
pipeline_stored_descriptions.resize(fread( pipeline_stored_descriptions.resize(
pipeline_stored_descriptions.data(), fread(pipeline_stored_descriptions.data(),
sizeof(PipelineStoredDescription), pipeline_state_storage_told_count, sizeof(PipelineStoredDescription), pipeline_storage_told_count,
pipeline_state_storage_file_)); pipeline_storage_file_));
if (!pipeline_stored_descriptions.empty()) { if (!pipeline_stored_descriptions.empty()) {
// Launch additional creation threads to use all cores to create // Launch additional creation threads to use all cores to create
// pipeline state objects faster. Will also be using the main thread, so // pipelines faster. Will also be using the main thread, so minus 1.
// minus 1.
size_t creation_thread_original_count = creation_threads_.size(); size_t creation_thread_original_count = creation_threads_.size();
size_t creation_thread_needed_count = size_t creation_thread_needed_count =
std::max(std::min(pipeline_stored_descriptions.size(), std::max(std::min(pipeline_stored_descriptions.size(),
@ -539,10 +531,10 @@ void PipelineCache::InitializeShaderStorage(
{}, [this, creation_thread_index]() { {}, [this, creation_thread_index]() {
CreationThread(creation_thread_index); CreationThread(creation_thread_index);
}); });
creation_thread->set_name("D3D12 Pipeline States Additional"); creation_thread->set_name("D3D12 Pipelines");
creation_threads_.push_back(std::move(creation_thread)); creation_threads_.push_back(std::move(creation_thread));
} }
size_t pipeline_states_created = 0; size_t pipelines_created = 0;
for (const PipelineStoredDescription& pipeline_stored_description : for (const PipelineStoredDescription& pipeline_stored_description :
pipeline_stored_descriptions) { pipeline_stored_descriptions) {
const PipelineDescription& pipeline_description = const PipelineDescription& pipeline_description =
@ -554,30 +546,28 @@ void PipelineCache::InitializeShaderStorage(
0) != pipeline_stored_description.description_hash) { 0) != pipeline_stored_description.description_hash) {
break; break;
} }
pipeline_state_storage_valid_bytes += pipeline_storage_valid_bytes += sizeof(PipelineStoredDescription);
sizeof(PipelineStoredDescription); // Skip already known pipelines - those have already been enqueued.
// Skip already known pipeline states - those have already been auto found_range = pipelines_.equal_range(
// enqueued.
auto found_range = pipeline_states_.equal_range(
pipeline_stored_description.description_hash); pipeline_stored_description.description_hash);
bool pipeline_state_found = false; bool pipeline_found = false;
for (auto it = found_range.first; it != found_range.second; ++it) { for (auto it = found_range.first; it != found_range.second; ++it) {
PipelineState* found_pipeline_state = it->second; Pipeline* found_pipeline = it->second;
if (!std::memcmp(&found_pipeline_state->description.description, if (!std::memcmp(&found_pipeline->description.description,
&pipeline_description, &pipeline_description,
sizeof(pipeline_description))) { sizeof(pipeline_description))) {
pipeline_state_found = true; pipeline_found = true;
break; break;
} }
} }
if (pipeline_state_found) { if (pipeline_found) {
continue; continue;
} }
PipelineRuntimeDescription pipeline_runtime_description; PipelineRuntimeDescription pipeline_runtime_description;
auto vertex_shader_it = auto vertex_shader_it =
shader_map_.find(pipeline_description.vertex_shader_hash); shaders_.find(pipeline_description.vertex_shader_hash);
if (vertex_shader_it == shader_map_.end()) { if (vertex_shader_it == shaders_.end()) {
continue; continue;
} }
pipeline_runtime_description.vertex_shader = vertex_shader_it->second; pipeline_runtime_description.vertex_shader = vertex_shader_it->second;
@ -586,8 +576,8 @@ void PipelineCache::InitializeShaderStorage(
} }
if (pipeline_description.pixel_shader_hash) { if (pipeline_description.pixel_shader_hash) {
auto pixel_shader_it = auto pixel_shader_it =
shader_map_.find(pipeline_description.pixel_shader_hash); shaders_.find(pipeline_description.pixel_shader_hash);
if (pixel_shader_it == shader_map_.end()) { if (pixel_shader_it == shaders_.end()) {
continue; continue;
} }
pipeline_runtime_description.pixel_shader = pixel_shader_it->second; pipeline_runtime_description.pixel_shader = pixel_shader_it->second;
@ -607,36 +597,33 @@ void PipelineCache::InitializeShaderStorage(
std::memcpy(&pipeline_runtime_description.description, std::memcpy(&pipeline_runtime_description.description,
&pipeline_description, sizeof(pipeline_description)); &pipeline_description, sizeof(pipeline_description));
PipelineState* new_pipeline_state = new PipelineState; Pipeline* new_pipeline = new Pipeline;
new_pipeline_state->state = nullptr; new_pipeline->state = nullptr;
std::memcpy(&new_pipeline_state->description, std::memcpy(&new_pipeline->description, &pipeline_runtime_description,
&pipeline_runtime_description,
sizeof(pipeline_runtime_description)); sizeof(pipeline_runtime_description));
pipeline_states_.insert( pipelines_.emplace(pipeline_stored_description.description_hash,
std::make_pair(pipeline_stored_description.description_hash, new_pipeline);
new_pipeline_state)); COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
pipeline_states_.size());
if (!creation_threads_.empty()) { if (!creation_threads_.empty()) {
// Submit the pipeline for creation to any available thread. // Submit the pipeline for creation to any available thread.
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_queue_.push_back(new_pipeline_state); creation_queue_.push_back(new_pipeline);
} }
creation_request_cond_.notify_one(); creation_request_cond_.notify_one();
} else { } else {
new_pipeline_state->state = new_pipeline->state =
CreateD3D12PipelineState(pipeline_runtime_description); CreateD3D12Pipeline(pipeline_runtime_description);
} }
++pipeline_states_created; ++pipelines_created;
} }
CreateQueuedPipelineStatesOnProcessorThread(); CreateQueuedPipelinesOnProcessorThread();
if (creation_threads_.size() > creation_thread_original_count) { if (creation_threads_.size() > creation_thread_original_count) {
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_threads_shutdown_from_ = creation_thread_original_count; creation_threads_shutdown_from_ = creation_thread_original_count;
// Assuming the queue is empty because of // Assuming the queue is empty because of
// CreateQueuedPipelineStatesOnProcessorThread. // CreateQueuedPipelinesOnProcessorThread.
} }
creation_request_cond_.notify_all(); creation_request_cond_.notify_all();
while (creation_threads_.size() > creation_thread_original_count) { while (creation_threads_.size() > creation_thread_original_count) {
@ -664,26 +651,23 @@ void PipelineCache::InitializeShaderStorage(
} }
} }
XELOGGPU( XELOGGPU(
"Created {} graphics pipeline state objects from the storage in {} " "Created {} graphics pipelines from the storage in {} milliseconds",
"milliseconds", pipelines_created,
pipeline_states_created,
(xe::Clock::QueryHostTickCount() - (xe::Clock::QueryHostTickCount() -
pipeline_state_storage_initialization_start_) * pipeline_storage_initialization_start_) *
1000 / xe::Clock::QueryHostTickFrequency()); 1000 / xe::Clock::QueryHostTickFrequency());
} }
} }
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, xe::filesystem::TruncateStdioFile(pipeline_storage_file_,
pipeline_state_storage_valid_bytes); pipeline_storage_valid_bytes);
} else { } else {
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, 0); xe::filesystem::TruncateStdioFile(pipeline_storage_file_, 0);
pipeline_state_storage_file_header.magic = pipeline_state_storage_magic; pipeline_storage_file_header.magic = pipeline_storage_magic;
pipeline_state_storage_file_header.magic_api = pipeline_storage_file_header.magic_api = pipeline_storage_magic_api;
pipeline_state_storage_magic_api; pipeline_storage_file_header.version_swapped =
pipeline_state_storage_file_header.version_swapped =
xe::byte_swap(PipelineDescription::kVersion); xe::byte_swap(PipelineDescription::kVersion);
fwrite(&pipeline_state_storage_file_header, fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
sizeof(pipeline_state_storage_file_header), 1, 1, pipeline_storage_file_);
pipeline_state_storage_file_);
} }
shader_storage_root_ = storage_root; shader_storage_root_ = storage_root;
@ -691,7 +675,7 @@ void PipelineCache::InitializeShaderStorage(
// Start the storage writing thread. // Start the storage writing thread.
storage_write_flush_shaders_ = false; storage_write_flush_shaders_ = false;
storage_write_flush_pipeline_states_ = false; storage_write_flush_pipelines_ = false;
storage_write_thread_shutdown_ = false; storage_write_thread_shutdown_ = false;
storage_write_thread_ = storage_write_thread_ =
xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); }); xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); });
@ -708,12 +692,12 @@ void PipelineCache::ShutdownShaderStorage() {
storage_write_thread_.reset(); storage_write_thread_.reset();
} }
storage_write_shader_queue_.clear(); storage_write_shader_queue_.clear();
storage_write_pipeline_state_queue_.clear(); storage_write_pipeline_queue_.clear();
if (pipeline_state_storage_file_) { if (pipeline_storage_file_) {
fclose(pipeline_state_storage_file_); fclose(pipeline_storage_file_);
pipeline_state_storage_file_ = nullptr; pipeline_storage_file_ = nullptr;
pipeline_state_storage_file_flush_needed_ = false; pipeline_storage_file_flush_needed_ = false;
} }
if (shader_storage_file_) { if (shader_storage_file_) {
@ -728,30 +712,29 @@ void PipelineCache::ShutdownShaderStorage() {
void PipelineCache::EndSubmission() { void PipelineCache::EndSubmission() {
if (shader_storage_file_flush_needed_ || if (shader_storage_file_flush_needed_ ||
pipeline_state_storage_file_flush_needed_) { pipeline_storage_file_flush_needed_) {
{ {
std::lock_guard<std::mutex> lock(storage_write_request_lock_); std::lock_guard<std::mutex> lock(storage_write_request_lock_);
if (shader_storage_file_flush_needed_) { if (shader_storage_file_flush_needed_) {
storage_write_flush_shaders_ = true; storage_write_flush_shaders_ = true;
} }
if (pipeline_state_storage_file_flush_needed_) { if (pipeline_storage_file_flush_needed_) {
storage_write_flush_pipeline_states_ = true; storage_write_flush_pipelines_ = true;
} }
} }
storage_write_request_cond_.notify_one(); storage_write_request_cond_.notify_one();
shader_storage_file_flush_needed_ = false; shader_storage_file_flush_needed_ = false;
pipeline_state_storage_file_flush_needed_ = false; pipeline_storage_file_flush_needed_ = false;
} }
if (!creation_threads_.empty()) { if (!creation_threads_.empty()) {
CreateQueuedPipelineStatesOnProcessorThread(); CreateQueuedPipelinesOnProcessorThread();
// Await creation of all queued pipeline state objects. // Await creation of all queued pipelines.
bool await_creation_completion_event; bool await_creation_completion_event;
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
// Assuming the creation queue is already empty (because the processor // Assuming the creation queue is already empty (because the processor
// thread also worked on creating the leftover pipeline state objects), so // thread also worked on creating the leftover pipelines), so only check
// only check if there are threads with pipeline state objects currently // if there are threads with pipelines currently being created.
// being created.
await_creation_completion_event = creation_threads_busy_ != 0; await_creation_completion_event = creation_threads_busy_ != 0;
if (await_creation_completion_event) { if (await_creation_completion_event) {
creation_completion_event_->Reset(); creation_completion_event_->Reset();
@ -765,7 +748,7 @@ void PipelineCache::EndSubmission() {
} }
} }
bool PipelineCache::IsCreatingPipelineStates() { bool PipelineCache::IsCreatingPipelines() {
if (creation_threads_.empty()) { if (creation_threads_.empty()) {
return false; return false;
} }
@ -779,8 +762,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
uint32_t dword_count) { uint32_t dword_count) {
// Hash the input memory and lookup the shader. // Hash the input memory and lookup the shader.
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
auto it = shader_map_.find(data_hash); auto it = shaders_.find(data_hash);
if (it != shader_map_.end()) { if (it != shaders_.end()) {
// Shader has been previously loaded. // Shader has been previously loaded.
return it->second; return it->second;
} }
@ -790,7 +773,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
// again. // again.
D3D12Shader* shader = D3D12Shader* shader =
new D3D12Shader(shader_type, data_hash, host_address, dword_count); new D3D12Shader(shader_type, data_hash, host_address, dword_count);
shader_map_.insert({data_hash, shader}); shaders_.emplace(data_hash, shader);
return shader; return shader;
} }
@ -798,11 +781,11 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid() Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid()
const { const {
// If the values this functions returns are changed, INVALIDATE THE SHADER // If the values this functions returns are changed, INVALIDATE THE SHADER
// STORAGE (increase kVersion for BOTH shaders and pipeline states)! The // STORAGE (increase kVersion for BOTH shaders and pipelines)! The exception
// exception is when the function originally returned "unsupported", but // is when the function originally returned "unsupported", but started to
// started to return a valid value (in this case the shader wouldn't be cached // return a valid value (in this case the shader wouldn't be cached in the
// in the first place). Otherwise games will not be able to locate shaders for // first place). Otherwise games will not be able to locate shaders for draws
// draws for which the host vertex shader type has changed! // for which the host vertex shader type has changed!
const auto& regs = register_file_; const auto& regs = register_file_;
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>(); auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode, if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode,
@ -929,13 +912,12 @@ bool PipelineCache::ConfigurePipeline(
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z, bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_state_handle_out, void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) {
ID3D12RootSignature** root_signature_out) {
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
assert_not_null(pipeline_state_handle_out); assert_not_null(pipeline_handle_out);
assert_not_null(root_signature_out); assert_not_null(root_signature_out);
PipelineRuntimeDescription runtime_description; PipelineRuntimeDescription runtime_description;
@ -946,24 +928,24 @@ bool PipelineCache::ConfigurePipeline(
} }
PipelineDescription& description = runtime_description.description; PipelineDescription& description = runtime_description.description;
if (current_pipeline_state_ != nullptr && if (current_pipeline_ != nullptr &&
!std::memcmp(&current_pipeline_state_->description.description, !std::memcmp(&current_pipeline_->description.description, &description,
&description, sizeof(description))) { sizeof(description))) {
*pipeline_state_handle_out = current_pipeline_state_; *pipeline_handle_out = current_pipeline_;
*root_signature_out = runtime_description.root_signature; *root_signature_out = runtime_description.root_signature;
return true; return true;
} }
// Find an existing pipeline state object in the cache. // Find an existing pipeline in the cache.
uint64_t hash = XXH64(&description, sizeof(description), 0); uint64_t hash = XXH64(&description, sizeof(description), 0);
auto found_range = pipeline_states_.equal_range(hash); auto found_range = pipelines_.equal_range(hash);
for (auto it = found_range.first; it != found_range.second; ++it) { for (auto it = found_range.first; it != found_range.second; ++it) {
PipelineState* found_pipeline_state = it->second; Pipeline* found_pipeline = it->second;
if (!std::memcmp(&found_pipeline_state->description.description, if (!std::memcmp(&found_pipeline->description.description, &description,
&description, sizeof(description))) { sizeof(description))) {
current_pipeline_state_ = found_pipeline_state; current_pipeline_ = found_pipeline;
*pipeline_state_handle_out = found_pipeline_state; *pipeline_handle_out = found_pipeline;
*root_signature_out = found_pipeline_state->description.root_signature; *root_signature_out = found_pipeline->description.root_signature;
return true; return true;
} }
} }
@ -974,33 +956,32 @@ bool PipelineCache::ConfigurePipeline(
return false; return false;
} }
PipelineState* new_pipeline_state = new PipelineState; Pipeline* new_pipeline = new Pipeline;
new_pipeline_state->state = nullptr; new_pipeline->state = nullptr;
std::memcpy(&new_pipeline_state->description, &runtime_description, std::memcpy(&new_pipeline->description, &runtime_description,
sizeof(runtime_description)); sizeof(runtime_description));
pipeline_states_.insert(std::make_pair(hash, new_pipeline_state)); pipelines_.emplace(hash, new_pipeline);
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
pipeline_states_.size());
if (!creation_threads_.empty()) { if (!creation_threads_.empty()) {
// Submit the pipeline state object for creation to any available thread. // Submit the pipeline for creation to any available thread.
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_queue_.push_back(new_pipeline_state); creation_queue_.push_back(new_pipeline);
} }
creation_request_cond_.notify_one(); creation_request_cond_.notify_one();
} else { } else {
new_pipeline_state->state = CreateD3D12PipelineState(runtime_description); new_pipeline->state = CreateD3D12Pipeline(runtime_description);
} }
if (pipeline_state_storage_file_) { if (pipeline_storage_file_) {
assert_not_null(storage_write_thread_); assert_not_null(storage_write_thread_);
pipeline_state_storage_file_flush_needed_ = true; pipeline_storage_file_flush_needed_ = true;
{ {
std::lock_guard<std::mutex> lock(storage_write_request_lock_); std::lock_guard<std::mutex> lock(storage_write_request_lock_);
storage_write_pipeline_state_queue_.emplace_back(); storage_write_pipeline_queue_.emplace_back();
PipelineStoredDescription& stored_description = PipelineStoredDescription& stored_description =
storage_write_pipeline_state_queue_.back(); storage_write_pipeline_queue_.back();
stored_description.description_hash = hash; stored_description.description_hash = hash;
std::memcpy(&stored_description.description, &description, std::memcpy(&stored_description.description, &description,
sizeof(description)); sizeof(description));
@ -1008,8 +989,8 @@ bool PipelineCache::ConfigurePipeline(
storage_write_request_cond_.notify_all(); storage_write_request_cond_.notify_all();
} }
current_pipeline_state_ = new_pipeline_state; current_pipeline_ = new_pipeline;
*pipeline_state_handle_out = new_pipeline_state; *pipeline_handle_out = new_pipeline;
*root_signature_out = runtime_description.root_signature; *root_signature_out = runtime_description.root_signature;
return true; return true;
} }
@ -1136,8 +1117,8 @@ bool PipelineCache::TranslateShader(
std::memcpy( std::memcpy(
texture_binding_layouts_.data() + new_uid.vector_span_offset, texture_binding_layouts_.data() + new_uid.vector_span_offset,
texture_bindings, texture_binding_layout_bytes); texture_bindings, texture_binding_layout_bytes);
texture_binding_layout_map_.insert( texture_binding_layout_map_.emplace(texture_binding_layout_hash,
{texture_binding_layout_hash, new_uid}); new_uid);
} }
} }
if (bindless_sampler_count) { if (bindless_sampler_count) {
@ -1179,8 +1160,8 @@ bool PipelineCache::TranslateShader(
vector_bindless_sampler_layout[i] = vector_bindless_sampler_layout[i] =
sampler_bindings[i].bindless_descriptor_index; sampler_bindings[i].bindless_descriptor_index;
} }
bindless_sampler_layout_map_.insert( bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash,
{bindless_sampler_layout_hash, new_uid}); new_uid);
} }
} }
} }
@ -1508,8 +1489,7 @@ bool PipelineCache::GetCurrentStateDescription(
/* 16 */ PipelineBlendFactor::kSrcAlphaSat, /* 16 */ PipelineBlendFactor::kSrcAlphaSat,
}; };
// Like kBlendFactorMap, but with color modes changed to alpha. Some // Like kBlendFactorMap, but with color modes changed to alpha. Some
// pipeline state objects aren't created in Prey because a color mode is // pipelines aren't created in Prey because a color mode is used for alpha.
// used for alpha.
static const PipelineBlendFactor kBlendFactorAlphaMap[32] = { static const PipelineBlendFactor kBlendFactorAlphaMap[32] = {
/* 0 */ PipelineBlendFactor::kZero, /* 0 */ PipelineBlendFactor::kZero,
/* 1 */ PipelineBlendFactor::kOne, /* 1 */ PipelineBlendFactor::kOne,
@ -1569,18 +1549,16 @@ bool PipelineCache::GetCurrentStateDescription(
return true; return true;
} }
ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState( ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
const PipelineRuntimeDescription& runtime_description) { const PipelineRuntimeDescription& runtime_description) {
const PipelineDescription& description = runtime_description.description; const PipelineDescription& description = runtime_description.description;
if (runtime_description.pixel_shader != nullptr) { if (runtime_description.pixel_shader != nullptr) {
XELOGGPU( XELOGGPU("Creating graphics pipeline with VS {:016X}, PS {:016X}",
"Creating graphics pipeline state with VS {:016X}" runtime_description.vertex_shader->ucode_data_hash(),
", PS {:016X}", runtime_description.pixel_shader->ucode_data_hash());
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
} else { } else {
XELOGGPU("Creating graphics pipeline state with VS {:016X}", XELOGGPU("Creating graphics pipeline with VS {:016X}",
runtime_description.vertex_shader->ucode_data_hash()); runtime_description.vertex_shader->ucode_data_hash());
} }
@ -1893,20 +1871,18 @@ ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
} }
} }
// Create the pipeline state object. // Create the D3D12 pipeline state object.
auto device = auto device =
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
ID3D12PipelineState* state; ID3D12PipelineState* state;
if (FAILED(device->CreateGraphicsPipelineState(&state_desc, if (FAILED(device->CreateGraphicsPipelineState(&state_desc,
IID_PPV_ARGS(&state)))) { IID_PPV_ARGS(&state)))) {
if (runtime_description.pixel_shader != nullptr) { if (runtime_description.pixel_shader != nullptr) {
XELOGE( XELOGE("Failed to create graphics pipeline with VS {:016X}, PS {:016X}",
"Failed to create graphics pipeline state with VS {:016X}" runtime_description.vertex_shader->ucode_data_hash(),
", PS {:016X}", runtime_description.pixel_shader->ucode_data_hash());
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
} else { } else {
XELOGE("Failed to create graphics pipeline state with VS {:016X}", XELOGE("Failed to create graphics pipeline with VS {:016X}",
runtime_description.vertex_shader->ucode_data_hash()); runtime_description.vertex_shader->ucode_data_hash());
} }
return nullptr; return nullptr;
@ -1933,7 +1909,7 @@ void PipelineCache::StorageWriteThread() {
ucode_guest_endian.reserve(0xFFFF); ucode_guest_endian.reserve(0xFFFF);
bool flush_shaders = false; bool flush_shaders = false;
bool flush_pipeline_states = false; bool flush_pipelines = false;
while (true) { while (true) {
if (flush_shaders) { if (flush_shaders) {
@ -1941,15 +1917,15 @@ void PipelineCache::StorageWriteThread() {
assert_not_null(shader_storage_file_); assert_not_null(shader_storage_file_);
fflush(shader_storage_file_); fflush(shader_storage_file_);
} }
if (flush_pipeline_states) { if (flush_pipelines) {
flush_pipeline_states = false; flush_pipelines = false;
assert_not_null(pipeline_state_storage_file_); assert_not_null(pipeline_storage_file_);
fflush(pipeline_state_storage_file_); fflush(pipeline_storage_file_);
} }
std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {}; std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {};
PipelineStoredDescription pipeline_description; PipelineStoredDescription pipeline_description;
bool write_pipeline_state = false; bool write_pipeline = false;
{ {
std::unique_lock<std::mutex> lock(storage_write_request_lock_); std::unique_lock<std::mutex> lock(storage_write_request_lock_);
if (storage_write_thread_shutdown_) { if (storage_write_thread_shutdown_) {
@ -1962,17 +1938,17 @@ void PipelineCache::StorageWriteThread() {
storage_write_flush_shaders_ = false; storage_write_flush_shaders_ = false;
flush_shaders = true; flush_shaders = true;
} }
if (!storage_write_pipeline_state_queue_.empty()) { if (!storage_write_pipeline_queue_.empty()) {
std::memcpy(&pipeline_description, std::memcpy(&pipeline_description,
&storage_write_pipeline_state_queue_.front(), &storage_write_pipeline_queue_.front(),
sizeof(pipeline_description)); sizeof(pipeline_description));
storage_write_pipeline_state_queue_.pop_front(); storage_write_pipeline_queue_.pop_front();
write_pipeline_state = true; write_pipeline = true;
} else if (storage_write_flush_pipeline_states_) { } else if (storage_write_flush_pipelines_) {
storage_write_flush_pipeline_states_ = false; storage_write_flush_pipelines_ = false;
flush_pipeline_states = true; flush_pipelines = true;
} }
if (!shader_pair.first && !write_pipeline_state) { if (!shader_pair.first && !write_pipeline) {
storage_write_request_cond_.wait(lock); storage_write_request_cond_.wait(lock);
continue; continue;
} }
@ -1999,27 +1975,26 @@ void PipelineCache::StorageWriteThread() {
} }
} }
if (write_pipeline_state) { if (write_pipeline) {
assert_not_null(pipeline_state_storage_file_); assert_not_null(pipeline_storage_file_);
fwrite(&pipeline_description, sizeof(pipeline_description), 1, fwrite(&pipeline_description, sizeof(pipeline_description), 1,
pipeline_state_storage_file_); pipeline_storage_file_);
} }
} }
} }
void PipelineCache::CreationThread(size_t thread_index) { void PipelineCache::CreationThread(size_t thread_index) {
while (true) { while (true) {
PipelineState* pipeline_state_to_create = nullptr; Pipeline* pipeline_to_create = nullptr;
// Check if need to shut down or set the completion event and dequeue the // Check if need to shut down or set the completion event and dequeue the
// pipeline state if there is any. // pipeline if there is any.
{ {
std::unique_lock<std::mutex> lock(creation_request_lock_); std::unique_lock<std::mutex> lock(creation_request_lock_);
if (thread_index >= creation_threads_shutdown_from_ || if (thread_index >= creation_threads_shutdown_from_ ||
creation_queue_.empty()) { creation_queue_.empty()) {
if (creation_completion_set_event_ && creation_threads_busy_ == 0) { if (creation_completion_set_event_ && creation_threads_busy_ == 0) {
// Last pipeline state object in the queue created - signal the event // Last pipeline in the queue created - signal the event if requested.
// if requested.
creation_completion_set_event_ = false; creation_completion_set_event_ = false;
creation_completion_event_->Set(); creation_completion_event_->Set();
} }
@ -2029,23 +2004,22 @@ void PipelineCache::CreationThread(size_t thread_index) {
creation_request_cond_.wait(lock); creation_request_cond_.wait(lock);
continue; continue;
} }
// Take the pipeline state from the queue and increment the busy thread // Take the pipeline from the queue and increment the busy thread count
// count until the pipeline state object is created - other threads must // until the pipeline is created - other threads must be able to dequeue
// be able to dequeue requests, but can't set the completion event until // requests, but can't set the completion event until the pipelines are
// the pipeline state objects are fully created (rather than just started // fully created (rather than just started creating).
// creating). pipeline_to_create = creation_queue_.front();
pipeline_state_to_create = creation_queue_.front();
creation_queue_.pop_front(); creation_queue_.pop_front();
++creation_threads_busy_; ++creation_threads_busy_;
} }
// Create the D3D12 pipeline state object. // Create the D3D12 pipeline state object.
pipeline_state_to_create->state = pipeline_to_create->state =
CreateD3D12PipelineState(pipeline_state_to_create->description); CreateD3D12Pipeline(pipeline_to_create->description);
// Pipeline state object created - the thread is not busy anymore, safe to // Pipeline created - the thread is not busy anymore, safe to set the
// set the completion event if needed (at the next iteration, or in some // completion event if needed (at the next iteration, or in some other
// other thread). // thread).
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
--creation_threads_busy_; --creation_threads_busy_;
@ -2053,20 +2027,20 @@ void PipelineCache::CreationThread(size_t thread_index) {
} }
} }
void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() { void PipelineCache::CreateQueuedPipelinesOnProcessorThread() {
assert_false(creation_threads_.empty()); assert_false(creation_threads_.empty());
while (true) { while (true) {
PipelineState* pipeline_state_to_create; Pipeline* pipeline_to_create;
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
if (creation_queue_.empty()) { if (creation_queue_.empty()) {
break; break;
} }
pipeline_state_to_create = creation_queue_.front(); pipeline_to_create = creation_queue_.front();
creation_queue_.pop_front(); creation_queue_.pop_front();
} }
pipeline_state_to_create->state = pipeline_to_create->state =
CreateD3D12PipelineState(pipeline_state_to_create->description); CreateD3D12Pipeline(pipeline_to_create->description);
} }
} }

View File

@ -29,6 +29,7 @@
#include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/register_file.h" #include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/ui/d3d12/d3d12_api.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -54,7 +55,7 @@ class PipelineCache {
void ShutdownShaderStorage(); void ShutdownShaderStorage();
void EndSubmission(); void EndSubmission();
bool IsCreatingPipelineStates(); bool IsCreatingPipelines();
D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address,
const uint32_t* host_address, uint32_t dword_count); const uint32_t* host_address, uint32_t dword_count);
@ -73,14 +74,12 @@ class PipelineCache {
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z, bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_state_handle_out, void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
ID3D12RootSignature** root_signature_out);
// Returns a pipeline state object with deferred creation by its handle. May // Returns a pipeline with deferred creation by its handle. May return nullptr
// return nullptr if failed to create the pipeline state object. // if failed to create the pipeline.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle( inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
void* handle) const { return reinterpret_cast<const Pipeline*>(handle)->state;
return reinterpret_cast<const PipelineState*>(handle)->state;
} }
private: private:
@ -237,7 +236,7 @@ class PipelineCache {
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
PipelineRuntimeDescription& runtime_description_out); PipelineRuntimeDescription& runtime_description_out);
ID3D12PipelineState* CreateD3D12PipelineState( ID3D12PipelineState* CreateD3D12Pipeline(
const PipelineRuntimeDescription& runtime_description); const PipelineRuntimeDescription& runtime_description);
D3D12CommandProcessor& command_processor_; D3D12CommandProcessor& command_processor_;
@ -255,9 +254,9 @@ class PipelineCache {
IDxcUtils* dxc_utils_ = nullptr; IDxcUtils* dxc_utils_ = nullptr;
IDxcCompiler* dxc_compiler_ = nullptr; IDxcCompiler* dxc_compiler_ = nullptr;
// All loaded shaders mapped by their guest hash key. // Ucode hash -> shader.
std::unordered_map<uint64_t, D3D12Shader*, xe::hash::IdentityHasher<uint64_t>> std::unordered_map<uint64_t, D3D12Shader*, xe::hash::IdentityHasher<uint64_t>>
shader_map_; shaders_;
struct LayoutUID { struct LayoutUID {
size_t uid; size_t uid;
@ -285,21 +284,20 @@ class PipelineCache {
// Xenos pixel shader provided. // Xenos pixel shader provided.
std::vector<uint8_t> depth_only_pixel_shader_; std::vector<uint8_t> depth_only_pixel_shader_;
struct PipelineState { struct Pipeline {
// nullptr if creation has failed. // nullptr if creation has failed.
ID3D12PipelineState* state; ID3D12PipelineState* state;
PipelineRuntimeDescription description; PipelineRuntimeDescription description;
}; };
// All previously generated pipeline state objects identified by hash and the // All previously generated pipelines identified by hash and the description.
// description. std::unordered_multimap<uint64_t, Pipeline*,
std::unordered_multimap<uint64_t, PipelineState*,
xe::hash::IdentityHasher<uint64_t>> xe::hash::IdentityHasher<uint64_t>>
pipeline_states_; pipelines_;
// Previously used pipeline state object. This matches our current state // Previously used pipeline. This matches our current state settings and
// settings and allows us to quickly(ish) reuse the pipeline state if no // allows us to quickly(ish) reuse the pipeline if no registers have been
// registers have changed. // changed.
PipelineState* current_pipeline_state_ = nullptr; Pipeline* current_pipeline_ = nullptr;
// Currently open shader storage path. // Currently open shader storage path.
std::filesystem::path shader_storage_root_; std::filesystem::path shader_storage_root_;
@ -309,10 +307,9 @@ class PipelineCache {
FILE* shader_storage_file_ = nullptr; FILE* shader_storage_file_ = nullptr;
bool shader_storage_file_flush_needed_ = false; bool shader_storage_file_flush_needed_ = false;
// Pipeline state storage output stream, for preload in the next emulator // Pipeline storage output stream, for preload in the next emulator runs.
// runs. FILE* pipeline_storage_file_ = nullptr;
FILE* pipeline_state_storage_file_ = nullptr; bool pipeline_storage_file_flush_needed_ = false;
bool pipeline_state_storage_file_flush_needed_ = false;
// Thread for asynchronous writing to the storage streams. // Thread for asynchronous writing to the storage streams.
void StorageWriteThread(); void StorageWriteThread();
@ -322,28 +319,27 @@ class PipelineCache {
// thread is notified about its change via storage_write_request_cond_. // thread is notified about its change via storage_write_request_cond_.
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>> std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
storage_write_shader_queue_; storage_write_shader_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_state_queue_; std::deque<PipelineStoredDescription> storage_write_pipeline_queue_;
bool storage_write_flush_shaders_ = false; bool storage_write_flush_shaders_ = false;
bool storage_write_flush_pipeline_states_ = false; bool storage_write_flush_pipelines_ = false;
bool storage_write_thread_shutdown_ = false; bool storage_write_thread_shutdown_ = false;
std::unique_ptr<xe::threading::Thread> storage_write_thread_; std::unique_ptr<xe::threading::Thread> storage_write_thread_;
// Pipeline state object creation threads. // Pipeline creation threads.
void CreationThread(size_t thread_index); void CreationThread(size_t thread_index);
void CreateQueuedPipelineStatesOnProcessorThread(); void CreateQueuedPipelinesOnProcessorThread();
std::mutex creation_request_lock_; std::mutex creation_request_lock_;
std::condition_variable creation_request_cond_; std::condition_variable creation_request_cond_;
// Protected with creation_request_lock_, notify_one creation_request_cond_ // Protected with creation_request_lock_, notify_one creation_request_cond_
// when set. // when set.
std::deque<PipelineState*> creation_queue_; std::deque<Pipeline*> creation_queue_;
// Number of threads that are currently creating a pipeline state object - // Number of threads that are currently creating a pipeline - incremented when
// incremented when a pipeline state object is dequeued (the completion event // a pipeline is dequeued (the completion event can't be triggered before this
// can't be triggered before this is zero). Protected with // is zero). Protected with creation_request_lock_.
// creation_request_lock_.
size_t creation_threads_busy_ = 0; size_t creation_threads_busy_ = 0;
// Manual-reset event set when the last queued pipeline state object is // Manual-reset event set when the last queued pipeline is created and there
// created and there are no more pipeline state objects to create. This is // are no more pipelines to create. This is triggered by the thread creating
// triggered by the thread creating the last pipeline state object. // the last pipeline.
std::unique_ptr<xe::threading::Event> creation_completion_event_; std::unique_ptr<xe::threading::Event> creation_completion_event_;
// Whether setting the event on completion is queued. Protected with // Whether setting the event on completion is queued. Protected with
// creation_request_lock_, notify_one creation_request_cond_ when set. // creation_request_lock_, notify_one creation_request_cond_ when set.

View File

@ -454,8 +454,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
// again and again and exit. // again and again and exit.
if (!conversion_needed || converted_index_count == 0) { if (!conversion_needed || converted_index_count == 0) {
converted_indices.gpu_address = 0; converted_indices.gpu_address = 0;
converted_indices_cache_.insert( converted_indices_cache_.emplace(converted_indices.key.value,
std::make_pair(converted_indices.key.value, converted_indices)); converted_indices);
memory_regions_used_ |= memory_regions_used_bits; memory_regions_used_ |= memory_regions_used_bits;
return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty
: ConversionResult::kConversionNotNeeded; : ConversionResult::kConversionNotNeeded;
@ -670,8 +670,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
// Cache and return the indices. // Cache and return the indices.
converted_indices.gpu_address = gpu_address; converted_indices.gpu_address = gpu_address;
converted_indices_cache_.insert( converted_indices_cache_.emplace(converted_indices.key.value,
std::make_pair(converted_indices.key.value, converted_indices)); converted_indices);
memory_regions_used_ |= memory_regions_used_bits; memory_regions_used_ |= memory_regions_used_bits;
gpu_address_out = gpu_address; gpu_address_out = gpu_address;
index_count_out = converted_index_count; index_count_out = converted_index_count;

View File

@ -277,20 +277,19 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false; return false;
} }
// Create the EDRAM load/store pipeline state objects. // Create the EDRAM load/store pipelines.
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) { for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i]; const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i];
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState( edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.load_shader, mode_info.load_shader_size, device, mode_info.load_shader, mode_info.load_shader_size,
edram_load_store_root_signature_); edram_load_store_root_signature_);
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState( edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.store_shader, mode_info.store_shader_size, device, mode_info.store_shader, mode_info.store_shader_size,
edram_load_store_root_signature_); edram_load_store_root_signature_);
if (edram_load_pipelines_[i] == nullptr || if (edram_load_pipelines_[i] == nullptr ||
edram_store_pipelines_[i] == nullptr) { edram_store_pipelines_[i] == nullptr) {
XELOGE( XELOGE("Failed to create the EDRAM load/store pipelines for mode {}",
"Failed to create the EDRAM load/store pipeline states for mode {}", i);
i);
Shutdown(); Shutdown();
return false; return false;
} }
@ -299,7 +298,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
} }
} }
// Create the resolve root signatures and pipeline state objects. // Create the resolve root signatures and pipelines.
D3D12_ROOT_PARAMETER resolve_root_parameters[3]; D3D12_ROOT_PARAMETER resolve_root_parameters[3];
// Copying root signature. // Copying root signature.
@ -369,7 +368,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false; return false;
} }
// Copying pipeline state objects. // Copying pipelines.
uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1; uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1;
for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount); for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount);
++i) { ++i) {
@ -381,63 +380,61 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
continue; continue;
} }
const auto& resolve_copy_shader = resolve_copy_shaders_[i]; const auto& resolve_copy_shader = resolve_copy_shaders_[i];
ID3D12PipelineState* resolve_copy_pipeline_state = ID3D12PipelineState* resolve_copy_pipeline =
ui::d3d12::util::CreateComputePipelineState( ui::d3d12::util::CreateComputePipeline(
device, resolve_copy_shader.first, resolve_copy_shader.second, device, resolve_copy_shader.first, resolve_copy_shader.second,
resolve_copy_root_signature_); resolve_copy_root_signature_);
if (resolve_copy_pipeline_state == nullptr) { if (resolve_copy_pipeline == nullptr) {
XELOGE("Failed to create {} resolve copy pipeline state", XELOGE("Failed to create {} resolve copy pipeline",
resolve_copy_shader_info.debug_name); resolve_copy_shader_info.debug_name);
} }
resolve_copy_pipeline_state->SetName(reinterpret_cast<LPCWSTR>( resolve_copy_pipeline->SetName(reinterpret_cast<LPCWSTR>(
xe::to_utf16(resolve_copy_shader_info.debug_name).c_str())); xe::to_utf16(resolve_copy_shader_info.debug_name).c_str()));
resolve_copy_pipeline_states_[i] = resolve_copy_pipeline_state; resolve_copy_pipelines_[i] = resolve_copy_pipeline;
} }
// Clearing pipeline state objects. // Clearing pipelines.
resolve_clear_32bpp_pipeline_state_ = resolve_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
ui::d3d12::util::CreateComputePipelineState( device,
device, resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs : resolve_clear_32bpp_cs,
: resolve_clear_32bpp_cs, resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs) : sizeof(resolve_clear_32bpp_cs),
: sizeof(resolve_clear_32bpp_cs), resolve_clear_root_signature_);
resolve_clear_root_signature_); if (resolve_clear_32bpp_pipeline_ == nullptr) {
if (resolve_clear_32bpp_pipeline_state_ == nullptr) { XELOGE("Failed to create the 32bpp resolve clear pipeline");
XELOGE("Failed to create the 32bpp resolve clear pipeline state");
Shutdown(); Shutdown();
return false; return false;
} }
resolve_clear_32bpp_pipeline_state_->SetName(L"Resolve Clear 32bpp"); resolve_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp");
resolve_clear_64bpp_pipeline_state_ = resolve_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
ui::d3d12::util::CreateComputePipelineState( device,
device, resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs : resolve_clear_64bpp_cs,
: resolve_clear_64bpp_cs, resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs) : sizeof(resolve_clear_64bpp_cs),
: sizeof(resolve_clear_64bpp_cs), resolve_clear_root_signature_);
resolve_clear_root_signature_); if (resolve_clear_64bpp_pipeline_ == nullptr) {
if (resolve_clear_64bpp_pipeline_state_ == nullptr) { XELOGE("Failed to create the 64bpp resolve clear pipeline");
XELOGE("Failed to create the 64bpp resolve clear pipeline state");
Shutdown(); Shutdown();
return false; return false;
} }
resolve_clear_64bpp_pipeline_state_->SetName(L"Resolve Clear 64bpp"); resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp");
if (!edram_rov_used_) { if (!edram_rov_used_) {
assert_false(resolution_scale_2x_); assert_false(resolution_scale_2x_);
resolve_clear_depth_24_32_pipeline_state_ = resolve_clear_depth_24_32_pipeline_ =
ui::d3d12::util::CreateComputePipelineState( ui::d3d12::util::CreateComputePipeline(
device, resolve_clear_depth_24_32_cs, device, resolve_clear_depth_24_32_cs,
sizeof(resolve_clear_depth_24_32_cs), sizeof(resolve_clear_depth_24_32_cs),
resolve_clear_root_signature_); resolve_clear_root_signature_);
if (resolve_clear_depth_24_32_pipeline_state_ == nullptr) { if (resolve_clear_depth_24_32_pipeline_ == nullptr) {
XELOGE( XELOGE(
"Failed to create the 24-bit and 32-bit depth resolve clear pipeline " "Failed to create the 24-bit and 32-bit depth resolve clear pipeline "
"state"); "state");
Shutdown(); Shutdown();
return false; return false;
} }
resolve_clear_64bpp_pipeline_state_->SetName( resolve_clear_64bpp_pipeline_->SetName(
L"Resolve Clear 24-bit & 32-bit Depth"); L"Resolve Clear 24-bit & 32-bit Depth");
} }
@ -451,12 +448,12 @@ void RenderTargetCache::Shutdown() {
edram_snapshot_restore_pool_.reset(); edram_snapshot_restore_pool_.reset();
ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_); ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_state_); ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_state_); ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_state_); ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_); ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_);
for (size_t i = 0; i < xe::countof(resolve_copy_pipeline_states_); ++i) { for (size_t i = 0; i < xe::countof(resolve_copy_pipelines_); ++i) {
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipeline_states_[i]); ui::d3d12::util::ReleaseAndNull(resolve_copy_pipelines_[i]);
} }
ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_); ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_);
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) { for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
@ -1209,8 +1206,8 @@ bool RenderTargetCache::Resolve(const Memory& memory,
0, sizeof(copy_shader_constants) / sizeof(uint32_t), 0, sizeof(copy_shader_constants) / sizeof(uint32_t),
&copy_shader_constants, 0); &copy_shader_constants, 0);
} }
command_processor_.SetComputePipelineState( command_processor_.SetComputePipeline(
resolve_copy_pipeline_states_[size_t(copy_shader)]); resolve_copy_pipelines_[size_t(copy_shader)]);
command_processor_.SubmitBarriers(); command_processor_.SubmitBarriers();
command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1); command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1);
@ -1279,9 +1276,9 @@ bool RenderTargetCache::Resolve(const Memory& memory,
command_list.D3DSetComputeRoot32BitConstants( command_list.D3DSetComputeRoot32BitConstants(
0, sizeof(depth_clear_constants) / sizeof(uint32_t), 0, sizeof(depth_clear_constants) / sizeof(uint32_t),
&depth_clear_constants, 0); &depth_clear_constants, 0);
command_processor_.SetComputePipelineState( command_processor_.SetComputePipeline(
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_state_ clear_float32_depth ? resolve_clear_depth_24_32_pipeline_
: resolve_clear_32bpp_pipeline_state_); : resolve_clear_32bpp_pipeline_);
command_processor_.SubmitBarriers(); command_processor_.SubmitBarriers();
command_list.D3DDispatch(clear_group_count.first, command_list.D3DDispatch(clear_group_count.first,
clear_group_count.second, 1); clear_group_count.second, 1);
@ -1301,10 +1298,10 @@ bool RenderTargetCache::Resolve(const Memory& memory,
0, sizeof(color_clear_constants) / sizeof(uint32_t), 0, sizeof(color_clear_constants) / sizeof(uint32_t),
&color_clear_constants, 0); &color_clear_constants, 0);
} }
command_processor_.SetComputePipelineState( command_processor_.SetComputePipeline(
resolve_info.color_edram_info.format_is_64bpp resolve_info.color_edram_info.format_is_64bpp
? resolve_clear_64bpp_pipeline_state_ ? resolve_clear_64bpp_pipeline_
: resolve_clear_32bpp_pipeline_state_); : resolve_clear_32bpp_pipeline_);
command_processor_.SubmitBarriers(); command_processor_.SubmitBarriers();
command_list.D3DDispatch(clear_group_count.first, command_list.D3DDispatch(clear_group_count.first,
clear_group_count.second, 1); clear_group_count.second, 1);
@ -1816,7 +1813,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
render_target->footprints, nullptr, nullptr, render_target->footprints, nullptr, nullptr,
&copy_buffer_size); &copy_buffer_size);
render_target->copy_buffer_size = uint32_t(copy_buffer_size); render_target->copy_buffer_size = uint32_t(copy_buffer_size);
render_targets_.insert(std::make_pair(key.value, render_target)); render_targets_.emplace(key.value, render_target);
COUNT_profile_set("gpu/render_target_cache/render_targets", COUNT_profile_set("gpu/render_target_cache/render_targets",
render_targets_.size()); render_targets_.size());
#if 0 #if 0
@ -2015,8 +2012,7 @@ void RenderTargetCache::StoreRenderTargetsToEdram() {
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth, EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format); render_target->key.format);
command_processor_.SetComputePipelineState( command_processor_.SetComputePipeline(edram_store_pipelines_[size_t(mode)]);
edram_store_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples. // 1 group per 80x16 samples.
command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1); command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1);
@ -2140,8 +2136,7 @@ void RenderTargetCache::LoadRenderTargetsFromEdram(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth, EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format); render_target->key.format);
command_processor_.SetComputePipelineState( command_processor_.SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
edram_load_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples. // 1 group per 80x16 samples.
command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1); command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1);

View File

@ -237,14 +237,13 @@ class D3D12CommandProcessor;
// get each of the 4 host pixels for each sample. // get each of the 4 host pixels for each sample.
class RenderTargetCache { class RenderTargetCache {
public: public:
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors // Direct3D 12 debug layer is giving errors that contradict each other when
// that contradict each other when you use null RTV descriptors - if you set // you use null RTV descriptors - if you set a valid format in RTVFormats in
// a valid format in RTVFormats in the pipeline state, it says that null // the pipeline state, it says that null descriptors can only be used if the
// descriptors can only be used if the format in the pipeline state is // format in the pipeline state is DXGI_FORMAT_UNKNOWN, however, if
// DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains // DXGI_FORMAT_UNKNOWN is set, it complains that the format in the pipeline
// that the format in the pipeline doesn't match the RTV format. So we have to // state doesn't match the RTV format. So we have to make render target
// make render target bindings consecutive and remap the output indices in // bindings consecutive and remap the output indices in pixel shaders.
// pixel shaders.
struct PipelineRenderTarget { struct PipelineRenderTarget {
uint32_t guest_render_target; uint32_t guest_render_target;
DXGI_FORMAT format; DXGI_FORMAT format;
@ -537,7 +536,7 @@ class RenderTargetCache {
// 16: - EDRAM pitch in tiles. // 16: - EDRAM pitch in tiles.
uint32_t base_samples_2x_depth_pitch; uint32_t base_samples_2x_depth_pitch;
}; };
// EDRAM pipeline states for the RTV/DSV path. // EDRAM pipelines for the RTV/DSV path.
static const EdramLoadStoreModeInfo static const EdramLoadStoreModeInfo
edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)]; edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)];
ID3D12PipelineState* ID3D12PipelineState*
@ -546,20 +545,20 @@ class RenderTargetCache {
ID3D12PipelineState* ID3D12PipelineState*
edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {}; edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {};
// Resolve root signatures and pipeline state objects. // Resolve root signatures and pipelines.
ID3D12RootSignature* resolve_copy_root_signature_ = nullptr; ID3D12RootSignature* resolve_copy_root_signature_ = nullptr;
static const std::pair<const uint8_t*, size_t> static const std::pair<const uint8_t*, size_t>
resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)]; resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)];
ID3D12PipelineState* resolve_copy_pipeline_states_[size_t( ID3D12PipelineState* resolve_copy_pipelines_[size_t(
draw_util::ResolveCopyShaderIndex::kCount)] = {}; draw_util::ResolveCopyShaderIndex::kCount)] = {};
ID3D12RootSignature* resolve_clear_root_signature_ = nullptr; ID3D12RootSignature* resolve_clear_root_signature_ = nullptr;
// Clearing 32bpp color, depth with ROV, or unorm depth without ROV. // Clearing 32bpp color, depth with ROV, or unorm depth without ROV.
ID3D12PipelineState* resolve_clear_32bpp_pipeline_state_ = nullptr; ID3D12PipelineState* resolve_clear_32bpp_pipeline_ = nullptr;
// Clearing 64bpp color. // Clearing 64bpp color.
ID3D12PipelineState* resolve_clear_64bpp_pipeline_state_ = nullptr; ID3D12PipelineState* resolve_clear_64bpp_pipeline_ = nullptr;
// Clearing float depth without ROV, both the float24 and the host float32 // Clearing float depth without ROV, both the float24 and the host float32
// versions. // versions.
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_state_ = nullptr; ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_ = nullptr;
// FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on // FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on
// Nvidia Maxwell 1st generation and older. // Nvidia Maxwell 1st generation and older.

View File

@ -918,27 +918,24 @@ bool TextureCache::Initialize(bool edram_rov_used) {
return false; return false;
} }
// Create the loading pipeline state objects. // Create the loading pipelines.
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
const LoadModeInfo& mode_info = load_mode_info_[i]; const LoadModeInfo& mode_info = load_mode_info_[i];
load_pipeline_states_[i] = ui::d3d12::util::CreateComputePipelineState( load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.shader, mode_info.shader_size, load_root_signature_); device, mode_info.shader, mode_info.shader_size, load_root_signature_);
if (load_pipeline_states_[i] == nullptr) { if (load_pipelines_[i] == nullptr) {
XELOGE( XELOGE("Failed to create the texture loading pipeline for mode {}", i);
"Failed to create the texture loading pipeline state object for mode "
"{}",
i);
Shutdown(); Shutdown();
return false; return false;
} }
if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) { if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) {
load_pipeline_states_2x_[i] = ui::d3d12::util::CreateComputePipelineState( load_pipelines_2x_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.shader_2x, mode_info.shader_2x_size, device, mode_info.shader_2x, mode_info.shader_2x_size,
load_root_signature_); load_root_signature_);
if (load_pipeline_states_2x_[i] == nullptr) { if (load_pipelines_2x_[i] == nullptr) {
XELOGE( XELOGE(
"Failed to create the 2x-scaled texture loading pipeline state " "Failed to create the 2x-scaled texture loading pipeline for mode "
"for mode {}", "{}",
i); i);
Shutdown(); Shutdown();
return false; return false;
@ -1024,8 +1021,8 @@ void TextureCache::Shutdown() {
ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_); ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_);
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_2x_[i]); ui::d3d12::util::ReleaseAndNull(load_pipelines_2x_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_[i]); ui::d3d12::util::ReleaseAndNull(load_pipelines_[i]);
} }
ui::d3d12::util::ReleaseAndNull(load_root_signature_); ui::d3d12::util::ReleaseAndNull(load_root_signature_);
@ -1892,7 +1889,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
if (IsResolutionScale2X() && key.tiled) { if (IsResolutionScale2X() && key.tiled) {
LoadMode load_mode = GetLoadMode(key); LoadMode load_mode = GetLoadMode(key);
if (load_mode != LoadMode::kUnknown && if (load_mode != LoadMode::kUnknown &&
load_pipeline_states_2x_[uint32_t(load_mode)] != nullptr) { load_pipelines_2x_[uint32_t(load_mode)] != nullptr) {
uint32_t base_size = 0, mip_size = 0; uint32_t base_size = 0, mip_size = 0;
texture_util::GetTextureTotalSize( texture_util::GetTextureTotalSize(
key.dimension, key.width, key.height, key.depth, key.format, key.dimension, key.width, key.height, key.depth, key.format,
@ -2047,7 +2044,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
} }
texture->base_watch_handle = nullptr; texture->base_watch_handle = nullptr;
texture->mip_watch_handle = nullptr; texture->mip_watch_handle = nullptr;
textures_.insert(std::make_pair(map_key, texture)); textures_.emplace(map_key, texture);
COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
textures_total_size_ += texture->resource_size; textures_total_size_ += texture->resource_size;
COUNT_profile_set("gpu/texture_cache/total_size_mb", COUNT_profile_set("gpu/texture_cache/total_size_mb",
@ -2079,10 +2076,10 @@ bool TextureCache::LoadTextureData(Texture* texture) {
return false; return false;
} }
bool scaled_resolve = texture->key.scaled_resolve ? true : false; bool scaled_resolve = texture->key.scaled_resolve ? true : false;
ID3D12PipelineState* pipeline_state = ID3D12PipelineState* pipeline = scaled_resolve
scaled_resolve ? load_pipeline_states_2x_[uint32_t(load_mode)] ? load_pipelines_2x_[uint32_t(load_mode)]
: load_pipeline_states_[uint32_t(load_mode)]; : load_pipelines_[uint32_t(load_mode)];
if (pipeline_state == nullptr) { if (pipeline == nullptr) {
return false; return false;
} }
const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)]; const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)];
@ -2296,7 +2293,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
load_mode_info.srv_bpe_log2); load_mode_info.srv_bpe_log2);
} }
} }
command_processor_.SetComputePipelineState(pipeline_state); command_processor_.SetComputePipeline(pipeline);
command_list.D3DSetComputeRootSignature(load_root_signature_); command_list.D3DSetComputeRootSignature(load_root_signature_);
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second); command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second);
@ -2597,7 +2594,7 @@ uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture,
} }
device->CreateShaderResourceView( device->CreateShaderResourceView(
texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index)); texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index));
texture.srv_descriptors.insert({descriptor_key, descriptor_index}); texture.srv_descriptors.emplace(descriptor_key, descriptor_index);
return descriptor_index; return descriptor_index;
} }

View File

@ -550,9 +550,9 @@ class TextureCache {
static const LoadModeInfo load_mode_info_[]; static const LoadModeInfo load_mode_info_[];
ID3D12RootSignature* load_root_signature_ = nullptr; ID3D12RootSignature* load_root_signature_ = nullptr;
ID3D12PipelineState* load_pipeline_states_[size_t(LoadMode::kCount)] = {}; ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {};
// Load pipeline state objects for 2x-scaled resolved targets. // Load pipelines for 2x-scaled resolved targets.
ID3D12PipelineState* load_pipeline_states_2x_[size_t(LoadMode::kCount)] = {}; ID3D12PipelineState* load_pipelines_2x_[size_t(LoadMode::kCount)] = {};
std::unordered_multimap<uint64_t, Texture*> textures_; std::unordered_multimap<uint64_t, Texture*> textures_;
uint64_t textures_total_size_ = 0; uint64_t textures_total_size_ = 0;

View File

@ -99,8 +99,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0), DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0),
DxbcSrc::LU(~uint32_t(3))); DxbcSrc::LU(~uint32_t(3)));
} }
// Add the word offset from the instruction, plus the offset of the first // Add the word offset from the instruction (signed), plus the offset of the
// needed word within the element. // first needed word within the element.
uint32_t first_word_index; uint32_t first_word_index;
xe::bit_scan_forward(needed_words, &first_word_index); xe::bit_scan_forward(needed_words, &first_word_index);
int32_t first_word_buffer_offset = int32_t first_word_buffer_offset =

View File

@ -65,17 +65,17 @@ enum class InstructionStorageTarget {
// disassembly (because oPts.x000 will be assembled, but oPts.x00_ has both // disassembly (because oPts.x000 will be assembled, but oPts.x00_ has both
// skipped components and zeros, which cannot be encoded, and therefore it will // skipped components and zeros, which cannot be encoded, and therefore it will
// not). // not).
constexpr uint32_t GetInstructionStorageTargetUsedComponents( constexpr uint32_t GetInstructionStorageTargetUsedComponentCount(
InstructionStorageTarget target) { InstructionStorageTarget target) {
switch (target) { switch (target) {
case InstructionStorageTarget::kNone: case InstructionStorageTarget::kNone:
return 0b0000; return 0;
case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex:
return 0b0111; return 3;
case InstructionStorageTarget::kDepth: case InstructionStorageTarget::kDepth:
return 0b0001; return 1;
default: default:
return 0b1111; return 4;
} }
} }
@ -136,8 +136,9 @@ struct InstructionResult {
// Returns the write mask containing only components actually present in the // Returns the write mask containing only components actually present in the
// target. // target.
uint32_t GetUsedWriteMask() const { uint32_t GetUsedWriteMask() const {
return original_write_mask & uint32_t target_component_count =
GetInstructionStorageTargetUsedComponents(storage_target); GetInstructionStorageTargetUsedComponentCount(storage_target);
return original_write_mask & ((1 << target_component_count) - 1);
} }
// True if the components are in their 'standard' swizzle arrangement (xyzw). // True if the components are in their 'standard' swizzle arrangement (xyzw).
bool IsStandardSwizzle() const { bool IsStandardSwizzle() const {
@ -161,6 +162,28 @@ struct InstructionResult {
} }
return used_components; return used_components;
} }
// Returns which components of the used write mask are constant, and what
// values they have.
uint32_t GetUsedConstantComponents(uint32_t& constant_values_out) const {
uint32_t constant_components = 0;
uint32_t constant_values = 0;
uint32_t used_write_mask = GetUsedWriteMask();
for (uint32_t i = 0; i < 4; ++i) {
if (!(used_write_mask & (1 << i))) {
continue;
}
SwizzleSource component = components[i];
if (component >= SwizzleSource::kX && component <= SwizzleSource::kW) {
continue;
}
constant_components |= 1 << i;
if (component == SwizzleSource::k1) {
constant_values |= 1 << i;
}
}
constant_values_out = constant_values;
return constant_components;
}
}; };
enum class InstructionStorageSource { enum class InstructionStorageSource {

View File

@ -25,6 +25,9 @@ namespace gpu {
// system page size granularity. // system page size granularity.
class SharedMemory { class SharedMemory {
public: public:
static constexpr uint32_t kBufferSizeLog2 = 29;
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
virtual ~SharedMemory(); virtual ~SharedMemory();
// Call in the implementation-specific ClearCache. // Call in the implementation-specific ClearCache.
virtual void ClearCache(); virtual void ClearCache();
@ -98,9 +101,6 @@ class SharedMemory {
// destructor. // destructor.
void ShutdownCommon(); void ShutdownCommon();
static constexpr uint32_t kBufferSizeLog2 = 29;
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
// Sparse allocations are 4 MB, so not too many of them are allocated, but // Sparse allocations are 4 MB, so not too many of them are allocated, but
// also not to waste too much memory for padding (with 16 MB there's too // also not to waste too much memory for padding (with 16 MB there's too
// much). // much).

View File

@ -118,15 +118,15 @@ bool D3D12ImmediateDrawer::Initialize() {
return false; return false;
} }
// Create the pipeline states. // Create the pipelines.
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_desc = {}; D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {};
pipeline_state_desc.pRootSignature = root_signature_; pipeline_desc.pRootSignature = root_signature_;
pipeline_state_desc.VS.pShaderBytecode = immediate_vs; pipeline_desc.VS.pShaderBytecode = immediate_vs;
pipeline_state_desc.VS.BytecodeLength = sizeof(immediate_vs); pipeline_desc.VS.BytecodeLength = sizeof(immediate_vs);
pipeline_state_desc.PS.pShaderBytecode = immediate_ps; pipeline_desc.PS.pShaderBytecode = immediate_ps;
pipeline_state_desc.PS.BytecodeLength = sizeof(immediate_ps); pipeline_desc.PS.BytecodeLength = sizeof(immediate_ps);
D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc = D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc =
pipeline_state_desc.BlendState.RenderTarget[0]; pipeline_desc.BlendState.RenderTarget[0];
pipeline_blend_desc.BlendEnable = TRUE; pipeline_blend_desc.BlendEnable = TRUE;
pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA; pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA;
pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA; pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
@ -138,11 +138,11 @@ bool D3D12ImmediateDrawer::Initialize() {
pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED | pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED |
D3D12_COLOR_WRITE_ENABLE_GREEN | D3D12_COLOR_WRITE_ENABLE_GREEN |
D3D12_COLOR_WRITE_ENABLE_BLUE; D3D12_COLOR_WRITE_ENABLE_BLUE;
pipeline_state_desc.SampleMask = UINT_MAX; pipeline_desc.SampleMask = UINT_MAX;
pipeline_state_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
pipeline_state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pipeline_state_desc.RasterizerState.FrontCounterClockwise = FALSE; pipeline_desc.RasterizerState.FrontCounterClockwise = FALSE;
pipeline_state_desc.RasterizerState.DepthClipEnable = TRUE; pipeline_desc.RasterizerState.DepthClipEnable = TRUE;
D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {}; D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {};
pipeline_input_elements[0].SemanticName = "POSITION"; pipeline_input_elements[0].SemanticName = "POSITION";
pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT; pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT;
@ -154,26 +154,24 @@ bool D3D12ImmediateDrawer::Initialize() {
pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM; pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM;
pipeline_input_elements[2].AlignedByteOffset = pipeline_input_elements[2].AlignedByteOffset =
offsetof(ImmediateVertex, color); offsetof(ImmediateVertex, color);
pipeline_state_desc.InputLayout.pInputElementDescs = pipeline_input_elements; pipeline_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
pipeline_state_desc.InputLayout.NumElements = pipeline_desc.InputLayout.NumElements =
UINT(xe::countof(pipeline_input_elements)); UINT(xe::countof(pipeline_input_elements));
pipeline_state_desc.PrimitiveTopologyType = pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; pipeline_desc.NumRenderTargets = 1;
pipeline_state_desc.NumRenderTargets = 1; pipeline_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
pipeline_state_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat; pipeline_desc.SampleDesc.Count = 1;
pipeline_state_desc.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState( if (FAILED(device->CreateGraphicsPipelineState(
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_triangle_)))) { &pipeline_desc, IID_PPV_ARGS(&pipeline_triangle_)))) {
XELOGE( XELOGE(
"Failed to create the Direct3D 12 immediate drawer triangle pipeline " "Failed to create the Direct3D 12 immediate drawer triangle pipeline "
"state"); "state");
Shutdown(); Shutdown();
return false; return false;
} }
pipeline_state_desc.PrimitiveTopologyType = pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
if (FAILED(device->CreateGraphicsPipelineState( if (FAILED(device->CreateGraphicsPipelineState(
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_line_)))) { &pipeline_desc, IID_PPV_ARGS(&pipeline_line_)))) {
XELOGE( XELOGE(
"Failed to create the Direct3D 12 immediate drawer line pipeline " "Failed to create the Direct3D 12 immediate drawer line pipeline "
"state"); "state");
@ -267,8 +265,8 @@ void D3D12ImmediateDrawer::Shutdown() {
util::ReleaseAndNull(sampler_heap_); util::ReleaseAndNull(sampler_heap_);
util::ReleaseAndNull(pipeline_state_line_); util::ReleaseAndNull(pipeline_line_);
util::ReleaseAndNull(pipeline_state_triangle_); util::ReleaseAndNull(pipeline_triangle_);
util::ReleaseAndNull(root_signature_); util::ReleaseAndNull(root_signature_);
} }
@ -611,17 +609,17 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
uint32_t(sampler_index))); uint32_t(sampler_index)));
} }
// Set the primitive type and the pipeline state for it. // Set the primitive type and the pipeline for it.
D3D_PRIMITIVE_TOPOLOGY primitive_topology; D3D_PRIMITIVE_TOPOLOGY primitive_topology;
ID3D12PipelineState* pipeline_state; ID3D12PipelineState* pipeline;
switch (draw.primitive_type) { switch (draw.primitive_type) {
case ImmediatePrimitiveType::kLines: case ImmediatePrimitiveType::kLines:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
pipeline_state = pipeline_state_line_; pipeline = pipeline_line_;
break; break;
case ImmediatePrimitiveType::kTriangles: case ImmediatePrimitiveType::kTriangles:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
pipeline_state = pipeline_state_triangle_; pipeline = pipeline_triangle_;
break; break;
default: default:
assert_unhandled_case(draw.primitive_type); assert_unhandled_case(draw.primitive_type);
@ -630,7 +628,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
if (current_primitive_topology_ != primitive_topology) { if (current_primitive_topology_ != primitive_topology) {
current_primitive_topology_ = primitive_topology; current_primitive_topology_ = primitive_topology;
current_command_list_->IASetPrimitiveTopology(primitive_topology); current_command_list_->IASetPrimitiveTopology(primitive_topology);
current_command_list_->SetPipelineState(pipeline_state); current_command_list_->SetPipelineState(pipeline);
} }
// Draw. // Draw.

View File

@ -105,8 +105,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer {
kCount kCount
}; };
ID3D12PipelineState* pipeline_state_triangle_ = nullptr; ID3D12PipelineState* pipeline_triangle_ = nullptr;
ID3D12PipelineState* pipeline_state_line_ = nullptr; ID3D12PipelineState* pipeline_line_ = nullptr;
ID3D12DescriptorHeap* sampler_heap_ = nullptr; ID3D12DescriptorHeap* sampler_heap_ = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_; D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_;

View File

@ -47,7 +47,7 @@ ID3D12RootSignature* CreateRootSignature(
return root_signature; return root_signature;
} }
ID3D12PipelineState* CreateComputePipelineState( ID3D12PipelineState* CreateComputePipeline(
ID3D12Device* device, const void* shader, size_t shader_size, ID3D12Device* device, const void* shader, size_t shader_size,
ID3D12RootSignature* root_signature) { ID3D12RootSignature* root_signature) {
D3D12_COMPUTE_PIPELINE_STATE_DESC desc; D3D12_COMPUTE_PIPELINE_STATE_DESC desc;

View File

@ -39,9 +39,10 @@ inline bool ReleaseAndNull(T& object) {
ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider, ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider,
const D3D12_ROOT_SIGNATURE_DESC& desc); const D3D12_ROOT_SIGNATURE_DESC& desc);
ID3D12PipelineState* CreateComputePipelineState( ID3D12PipelineState* CreateComputePipeline(ID3D12Device* device,
ID3D12Device* device, const void* shader, size_t shader_size, const void* shader,
ID3D12RootSignature* root_signature); size_t shader_size,
ID3D12RootSignature* root_signature);
constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) { constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) {
switch (element_size_bytes_log2) { switch (element_size_bytes_log2) {

View File

@ -71,7 +71,7 @@ void GraphicsUploadBufferPool::FlushWrites() {
GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request( GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request(
uint64_t submission_index, size_t size, size_t alignment, uint64_t submission_index, size_t size, size_t alignment,
size_t& offset_out) { size_t& offset_out) {
assert_not_zero(alignment); alignment = std::max(alignment, size_t(1));
assert_true(xe::is_pow2(alignment)); assert_true(xe::is_pow2(alignment));
size = xe::align(size, alignment); size = xe::align(size, alignment);
assert_true(size <= page_size_); assert_true(size <= page_size_);
@ -126,7 +126,7 @@ GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request(
GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::RequestPartial( GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::RequestPartial(
uint64_t submission_index, size_t size, size_t alignment, uint64_t submission_index, size_t size, size_t alignment,
size_t& offset_out, size_t& size_out) { size_t& offset_out, size_t& size_out) {
assert_not_zero(alignment); alignment = std::max(alignment, size_t(1));
assert_true(xe::is_pow2(alignment)); assert_true(xe::is_pow2(alignment));
size = xe::align(size, alignment); size = xe::align(size, alignment);
size = std::min(size, page_size_); size = std::min(size, page_size_);