[D3D12] Cleanup: pipeline state -> pipeline, other things

This commit is contained in:
Triang3l 2020-11-14 16:43:18 +03:00
parent 87a3c5fac2
commit 6b988d43c7
20 changed files with 475 additions and 490 deletions

View File

@ -387,7 +387,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
sampler_count_vertex);
return nullptr;
}
root_signatures_bindful_.insert({index, root_signature});
root_signatures_bindful_.emplace(index, root_signature);
return root_signature;
}
@ -745,12 +745,11 @@ void D3D12CommandProcessor::SetSamplePositions(
current_sample_positions_ = sample_positions;
}
void D3D12CommandProcessor::SetComputePipelineState(
ID3D12PipelineState* pipeline_state) {
if (current_external_pipeline_state_ != pipeline_state) {
deferred_command_list_.D3DSetPipelineState(pipeline_state);
current_external_pipeline_state_ = pipeline_state;
current_cached_pipeline_state_ = nullptr;
void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
if (current_external_pipeline_ != pipeline) {
deferred_command_list_.D3DSetPipelineState(pipeline);
current_external_pipeline_ = pipeline;
current_cached_pipeline_ = nullptr;
}
}
@ -773,8 +772,16 @@ std::string D3D12CommandProcessor::GetWindowTitleText() const {
}
// Currently scaling is only supported with ROV.
if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) {
return "Direct3D 12 - 2x";
return "Direct3D 12 - ROV 2x";
}
// Rasterizer-ordered views are a feature very rarely used as of 2020 and
// that faces adoption complications (outside of Direct3D - on Vulkan - at
// least), but crucial to Xenia - raise awareness of its usage.
// https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319
// "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I
// wasn't aware that Xenia D3D12 backend was using Raster Order Views
// feature" - oscarbg in that issue.
return "Direct3D 12 - ROV";
}
return "Direct3D 12";
}
@ -1196,7 +1203,7 @@ bool D3D12CommandProcessor::SetupContext() {
*this, *register_file_, bindless_resources_used_, edram_rov_used_,
texture_cache_->IsResolutionScale2X() ? 2 : 1);
if (!pipeline_cache_->Initialize()) {
XELOGE("Failed to initialize the graphics pipeline state cache");
XELOGE("Failed to initialize the graphics pipeline cache");
return false;
}
@ -1526,8 +1533,7 @@ void D3D12CommandProcessor::ShutdownContext() {
// Shut down binding - bindless descriptors may be owned by subsystems like
// the texture cache.
// Root signatured are used by pipeline states, thus freed after the pipeline
// states.
// Root signatures are used by pipelines, thus freed after the pipelines.
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_);
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_);
for (auto it : root_signatures_bindful_) {
@ -1878,7 +1884,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
xenos::VertexShaderExportMode::kMultipass ||
(primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
pa_su_sc_mode_cntl.cull_back))) {
// All faces are culled - can't be expressed in the pipeline state.
// All faces are culled - can't be expressed in the pipeline.
return true;
}
@ -1954,7 +1960,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
line_loop_closing_index = 0;
}
// Update the textures - this may bind pipeline state objects.
// Update the textures - this may bind pipelines.
uint32_t used_texture_mask =
vertex_shader->GetUsedTextureMask() |
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
@ -1972,21 +1978,21 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
early_z = true;
}
// Create the pipeline state object if needed and bind it.
void* pipeline_state_handle;
// Create the pipeline if needed and bind it.
void* pipeline_handle;
ID3D12RootSignature* root_signature;
if (!pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type_converted,
indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16,
early_z, pipeline_render_targets, &pipeline_state_handle,
early_z, pipeline_render_targets, &pipeline_handle,
&root_signature)) {
return false;
}
if (current_cached_pipeline_state_ != pipeline_state_handle) {
if (current_cached_pipeline_ != pipeline_handle) {
deferred_command_list_.SetPipelineStateHandle(
reinterpret_cast<void*>(pipeline_state_handle));
current_cached_pipeline_state_ = pipeline_state_handle;
current_external_pipeline_state_ = nullptr;
reinterpret_cast<void*>(pipeline_handle));
current_cached_pipeline_ = pipeline_handle;
current_external_pipeline_ = nullptr;
}
// Update viewport, scissor, blend factor and stencil reference.
@ -2005,14 +2011,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
}
// Must not call anything that can change the descriptor heap from now on!
// Ensure vertex and index buffers are resident and draw.
// Ensure vertex buffers are resident.
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
// validity will be tracked.
// validity is tracked.
uint64_t vertex_buffers_resident[2] = {};
for (const auto& vertex_binding : vertex_shader->vertex_bindings()) {
for (const Shader::VertexBinding& vertex_binding :
vertex_shader->vertex_bindings()) {
uint32_t vfetch_index = vertex_binding.fetch_constant;
if (vertex_buffers_resident[vfetch_index >> 6] &
(1ull << (vfetch_index & 63))) {
(uint64_t(1) << (vfetch_index & 63))) {
continue;
}
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
@ -2045,7 +2052,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
vfetch_constant.address << 2, vfetch_constant.size << 2);
return false;
}
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
<< (vfetch_index & 63);
}
// Gather memexport ranges and ensure the heaps for them are resident, and
@ -2517,8 +2525,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
submission_open_ = true;
// Start a new deferred command list - will submit it to the real one in the
// end of the submission (when async pipeline state object creation requests
// are fulfilled).
// end of the submission (when async pipeline creation requests are
// fulfilled).
deferred_command_list_.Reset();
// Reset cached state of the command list.
@ -2527,8 +2535,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
ff_blend_factor_update_needed_ = true;
ff_stencil_ref_update_needed_ = true;
current_sample_positions_ = xenos::MsaaSamples::k1X;
current_cached_pipeline_state_ = nullptr;
current_external_pipeline_state_ = nullptr;
current_cached_pipeline_ = nullptr;
current_external_pipeline_ = nullptr;
current_graphics_root_signature_ = nullptr;
current_graphics_root_up_to_date_ = 0;
if (bindless_resources_used_) {
@ -2724,7 +2732,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
}
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates();
return !submission_open_ || !pipeline_cache_->IsCreatingPipelines();
}
void D3D12CommandProcessor::ClearCommandAllocatorCache() {
@ -2745,12 +2753,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() {
}
void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
auto& regs = *register_file_;
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
// Window parameters.
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
// See r200UpdateWindow:
@ -2846,14 +2854,14 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
scissor.right = pa_sc_window_scissor_br.br_x;
scissor.bottom = pa_sc_window_scissor_br.br_y;
if (!pa_sc_window_scissor_tl.window_offset_disable) {
scissor.left =
std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0));
scissor.top =
std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0));
scissor.right =
std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0));
scissor.bottom =
std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0));
scissor.left = std::max(
LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0));
scissor.top = std::max(
LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0));
scissor.right = std::max(
LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0));
scissor.bottom = std::max(
LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0));
}
scissor.left *= pixel_size_x;
scissor.top *= pixel_size_y;
@ -2915,12 +2923,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
uint32_t line_loop_closing_index, xenos::Endian index_endian,
uint32_t used_texture_mask, bool early_z, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
auto& regs = *register_file_;
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
@ -3103,14 +3110,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index;
system_constants_.line_loop_closing_index = line_loop_closing_index;
// Vertex index offset.
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
system_constants_.vertex_base_index = vgt_indx_offset;
// Index or tessellation edge factor buffer endianness.
dirty |= system_constants_.vertex_index_endian != index_endian;
system_constants_.vertex_index_endian = index_endian;
// Vertex index offset.
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
system_constants_.vertex_base_index = vgt_indx_offset;
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
if (!pa_cl_clip_cntl.clip_disable) {
for (uint32_t i = 0; i < 6; ++i) {
@ -3574,7 +3581,7 @@ bool D3D12CommandProcessor::UpdateBindings(
float_constant_map_vertex.float_bitmap[i];
// If no float constants at all, we can reuse any buffer for them, so not
// invalidating.
if (float_constant_map_vertex.float_count != 0) {
if (float_constant_count_vertex) {
cbuffer_binding_float_vertex_.up_to_date = false;
}
}
@ -3589,7 +3596,7 @@ bool D3D12CommandProcessor::UpdateBindings(
float_constant_map_pixel.float_bitmap[i]) {
current_float_constant_map_pixel_[i] =
float_constant_map_pixel.float_bitmap[i];
if (float_constant_map_pixel.float_count != 0) {
if (float_constant_count_pixel) {
cbuffer_binding_float_pixel_.up_to_date = false;
}
}
@ -3889,8 +3896,8 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_parameters,
provider.OffsetSamplerDescriptor(
sampler_bindless_heap_cpu_start_, sampler_index));
texture_cache_bindless_sampler_map_.insert(
{sampler_parameters.value, sampler_index});
texture_cache_bindless_sampler_map_.emplace(
sampler_parameters.value, sampler_index);
}
current_sampler_bindless_indices_vertex_[j] = sampler_index;
}
@ -3921,8 +3928,8 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_parameters,
provider.OffsetSamplerDescriptor(
sampler_bindless_heap_cpu_start_, sampler_index));
texture_cache_bindless_sampler_map_.insert(
{sampler_parameters.value, sampler_index});
texture_cache_bindless_sampler_map_.emplace(
sampler_parameters.value, sampler_index);
}
current_sampler_bindless_indices_pixel_[j] = sampler_index;
}

View File

@ -186,19 +186,17 @@ class D3D12CommandProcessor : public CommandProcessor {
// render targets or copying to depth render targets.
void SetSamplePositions(xenos::MsaaSamples sample_positions);
// Returns a pipeline state object with deferred creation by its handle. May
// return nullptr if failed to create the pipeline state object.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
void* handle) const {
return pipeline_cache_->GetD3D12PipelineStateByHandle(handle);
// Returns a pipeline with deferred creation by its handle. May return nullptr
// if failed to create the pipeline.
inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
return pipeline_cache_->GetD3D12PipelineByHandle(handle);
}
// Sets the current pipeline state to a compute one. This is for cache
// invalidation primarily. A submission must be open.
void SetComputePipelineState(ID3D12PipelineState* pipeline_state);
// Sets the current pipeline to a compute one. This is for cache invalidation
// primarily. A submission must be open.
void SetComputePipeline(ID3D12PipelineState* pipeline);
// For the pipeline state cache to call when binding layout UIDs may be
// reused.
// For the pipeline cache to call when binding layout UIDs may be reused.
void NotifyShaderBindingsLayoutUIDsInvalidated();
// Returns the text to display in the GPU backend name in the window title.
@ -323,8 +321,8 @@ class D3D12CommandProcessor : public CommandProcessor {
bool EndSubmission(bool is_swap);
// Checks if ending a submission right now would not cause potentially more
// delay than it would reduce by making the GPU start working earlier - such
// as when there are unfinished graphics pipeline state creation requests that
// would need to be fulfilled before actually submitting the command list.
// as when there are unfinished graphics pipeline creation requests that would
// need to be fulfilled before actually submitting the command list.
bool CanEndSubmissionImmediately() const;
bool AwaitAllQueueOperationsCompletion() {
CheckSubmissionFence(submission_current_);
@ -548,13 +546,12 @@ class D3D12CommandProcessor : public CommandProcessor {
// Current SSAA sample positions (to be updated by the render target cache).
xenos::MsaaSamples current_sample_positions_;
// Currently bound pipeline state, either a graphics pipeline state object
// from the pipeline state cache (with potentially deferred creation -
// current_external_pipeline_state_ is nullptr in this case) or a non-Xenos
// graphics or compute pipeline state object (current_cached_pipeline_state_
// is nullptr in this case).
void* current_cached_pipeline_state_;
ID3D12PipelineState* current_external_pipeline_state_;
// Currently bound pipeline, either a graphics pipeline from the pipeline
// cache (with potentially deferred creation - current_external_pipeline_ is
// nullptr in this case) or a non-Xenos graphics or compute pipeline
// (current_cached_pipeline_ is nullptr in this case).
void* current_cached_pipeline_;
ID3D12PipelineState* current_external_pipeline_;
// Currently bound graphics root signature.
ID3D12RootSignature* current_graphics_root_signature_;

View File

@ -157,7 +157,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
stretch_pipeline_desc.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState(
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) {
XELOGE("Failed to create the front buffer stretch pipeline state");
XELOGE("Failed to create the front buffer stretch pipeline");
stretch_gamma_root_signature_->Release();
stretch_gamma_root_signature_ = nullptr;
stretch_root_signature_->Release();
@ -170,8 +170,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
if (FAILED(device->CreateGraphicsPipelineState(
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) {
XELOGE(
"Failed to create the gamma-correcting front buffer stretch "
"pipeline state");
"Failed to create the gamma-correcting front buffer stretch pipeline");
stretch_pipeline_->Release();
stretch_pipeline_ = nullptr;
stretch_gamma_root_signature_->Release();

View File

@ -85,7 +85,7 @@ class D3D12Shader : public Shader {
return sampler_bindings_.data();
}
// For owning subsystems like the pipeline state cache, accessors for unique
// For owning subsystems like the pipeline cache, accessors for unique
// identifiers (used instead of hashes to make sure collisions can't happen)
// of binding layouts used by the shader, for invalidation if a shader with an
// incompatible layout was bound.

View File

@ -209,8 +209,7 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
}
} break;
case Command::kSetPipelineStateHandle: {
current_pipeline_state =
command_processor_.GetD3D12PipelineStateByHandle(
current_pipeline_state = command_processor_.GetD3D12PipelineByHandle(
*reinterpret_cast<void* const*>(stream));
if (current_pipeline_state) {
command_list->SetPipelineState(current_pipeline_state);

View File

@ -43,10 +43,10 @@ DEFINE_bool(
"D3D12");
DEFINE_int32(
d3d12_pipeline_creation_threads, -1,
"Number of threads used for graphics pipeline state object creation. -1 to "
"calculate automatically (75% of logical CPU cores), a positive number to "
"specify the number of threads explicitly (up to the number of logical CPU "
"cores), 0 to disable multithreaded pipeline state object creation.",
"Number of threads used for graphics pipeline creation. -1 to calculate "
"automatically (75% of logical CPU cores), a positive number to specify "
"the number of threads explicitly (up to the number of logical CPU cores), "
"0 to disable multithreaded pipeline creation.",
"D3D12");
DEFINE_bool(d3d12_tessellation_wireframe, false,
"Display tessellated surfaces as wireframe for debugging.",
@ -125,8 +125,8 @@ bool PipelineCache::Initialize() {
logical_processor_count = 6;
}
// Initialize creation thread synchronization data even if not using creation
// threads because they may be used anyway to create pipeline state objects
// from the storage.
// threads because they may be used anyway to create pipelines from the
// storage.
creation_threads_busy_ = 0;
creation_completion_event_ =
xe::threading::Event::CreateManualResetEvent(true);
@ -145,7 +145,7 @@ bool PipelineCache::Initialize() {
for (size_t i = 0; i < creation_thread_count; ++i) {
std::unique_ptr<xe::threading::Thread> creation_thread =
xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); });
creation_thread->set_name("D3D12 Pipeline States");
creation_thread->set_name("D3D12 Pipelines");
creation_threads_.push_back(std::move(creation_thread));
}
}
@ -184,13 +184,12 @@ void PipelineCache::ClearCache(bool shutting_down) {
}
ShutdownShaderStorage();
// Remove references to the current pipeline state object.
current_pipeline_state_ = nullptr;
// Remove references to the current pipeline.
current_pipeline_ = nullptr;
if (!creation_threads_.empty()) {
// Empty the pipeline state object creation queue and make sure there are no
// threads currently creating pipeline state objects because pipeline states
// are going to be deleted.
// Empty the pipeline creation queue and make sure there are no threads
// currently creating pipelines because pipelines are going to be deleted.
bool await_creation_completion_event = false;
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
@ -207,13 +206,13 @@ void PipelineCache::ClearCache(bool shutting_down) {
}
}
// Destroy all pipeline state objects.
for (auto it : pipeline_states_) {
// Destroy all pipelines.
for (auto it : pipelines_) {
it.second->state->Release();
delete it.second;
}
pipeline_states_.clear();
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0);
pipelines_.clear();
COUNT_profile_set("gpu/pipeline_cache/pipelines", 0);
// Destroy all shaders.
command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated();
@ -223,10 +222,10 @@ void PipelineCache::ClearCache(bool shutting_down) {
}
texture_binding_layout_map_.clear();
texture_binding_layouts_.clear();
for (auto it : shader_map_) {
for (auto it : shaders_) {
delete it.second;
}
shader_map_.clear();
shaders_.clear();
if (reinitialize_shader_storage) {
InitializeShaderStorage(shader_storage_root, shader_storage_title_id,
@ -374,8 +373,7 @@ void PipelineCache::InitializeShaderStorage(
}
size_t ucode_byte_count =
shader_header.ucode_dword_count * sizeof(uint32_t);
if (shader_map_.find(shader_header.ucode_data_hash) !=
shader_map_.end()) {
if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) {
// Already added - usually shaders aren't added without the intention of
// translating them imminently, so don't do additional checks to
// actually ensure that translation happens right now (they would cause
@ -402,7 +400,7 @@ void PipelineCache::InitializeShaderStorage(
D3D12Shader* shader =
new D3D12Shader(shader_header.type, ucode_data_hash,
ucode_dwords.data(), shader_header.ucode_dword_count);
shader_map_.insert({ucode_data_hash, shader});
shaders_.emplace(ucode_data_hash, shader);
// Create new threads if the currently existing threads can't keep up with
// file reading, but not more than the number of logical processors minus
// one.
@ -439,7 +437,7 @@ void PipelineCache::InitializeShaderStorage(
}
shader_translation_threads.clear();
for (D3D12Shader* shader : shaders_failed_to_translate) {
shader_map_.erase(shader->ucode_data_hash());
shaders_.erase(shader->ucode_data_hash());
delete shader;
}
}
@ -460,72 +458,66 @@ void PipelineCache::InitializeShaderStorage(
}
// 'DXRO' or 'DXRT'.
const uint32_t pipeline_state_storage_magic_api =
const uint32_t pipeline_storage_magic_api =
edram_rov_used_ ? 0x4F525844 : 0x54525844;
// Initialize the pipeline state storage stream.
uint64_t pipeline_state_storage_initialization_start_ =
// Initialize the pipeline storage stream.
uint64_t pipeline_storage_initialization_start_ =
xe::Clock::QueryHostTickCount();
auto pipeline_state_storage_file_path =
auto pipeline_storage_file_path =
shader_storage_shareable_root /
fmt::format("{:08X}.{}.d3d12.xpso", title_id,
edram_rov_used_ ? "rov" : "rtv");
pipeline_state_storage_file_ =
xe::filesystem::OpenFile(pipeline_state_storage_file_path, "a+b");
if (!pipeline_state_storage_file_) {
pipeline_storage_file_ =
xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b");
if (!pipeline_storage_file_) {
XELOGE(
"Failed to open the Direct3D 12 pipeline state description storage "
"file for writing, persistent shader storage will be disabled: {}",
xe::path_to_utf8(pipeline_state_storage_file_path));
"Failed to open the Direct3D 12 pipeline description storage file for "
"writing, persistent shader storage will be disabled: {}",
xe::path_to_utf8(pipeline_storage_file_path));
fclose(shader_storage_file_);
shader_storage_file_ = nullptr;
return;
}
pipeline_state_storage_file_flush_needed_ = false;
pipeline_storage_file_flush_needed_ = false;
// 'XEPS'.
const uint32_t pipeline_state_storage_magic = 0x53504558;
const uint32_t pipeline_storage_magic = 0x53504558;
struct {
uint32_t magic;
uint32_t magic_api;
uint32_t version_swapped;
} pipeline_state_storage_file_header;
if (fread(&pipeline_state_storage_file_header,
sizeof(pipeline_state_storage_file_header), 1,
pipeline_state_storage_file_) &&
pipeline_state_storage_file_header.magic ==
pipeline_state_storage_magic &&
pipeline_state_storage_file_header.magic_api ==
pipeline_state_storage_magic_api &&
xe::byte_swap(pipeline_state_storage_file_header.version_swapped) ==
} pipeline_storage_file_header;
if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
1, pipeline_storage_file_) &&
pipeline_storage_file_header.magic == pipeline_storage_magic &&
pipeline_storage_file_header.magic_api == pipeline_storage_magic_api &&
xe::byte_swap(pipeline_storage_file_header.version_swapped) ==
PipelineDescription::kVersion) {
uint64_t pipeline_state_storage_valid_bytes =
sizeof(pipeline_state_storage_file_header);
// Enqueue pipeline state descriptions written by previous Xenia executions
// until the end of the file or until a corrupted one is detected.
xe::filesystem::Seek(pipeline_state_storage_file_, 0, SEEK_END);
int64_t pipeline_state_storage_told_end =
xe::filesystem::Tell(pipeline_state_storage_file_);
size_t pipeline_state_storage_told_count =
size_t(pipeline_state_storage_told_end >=
int64_t(pipeline_state_storage_valid_bytes)
? (uint64_t(pipeline_state_storage_told_end) -
pipeline_state_storage_valid_bytes) /
uint64_t pipeline_storage_valid_bytes =
sizeof(pipeline_storage_file_header);
// Enqueue pipeline descriptions written by previous Xenia executions until
// the end of the file or until a corrupted one is detected.
xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END);
int64_t pipeline_storage_told_end =
xe::filesystem::Tell(pipeline_storage_file_);
size_t pipeline_storage_told_count = size_t(
pipeline_storage_told_end >= int64_t(pipeline_storage_valid_bytes)
? (uint64_t(pipeline_storage_told_end) -
pipeline_storage_valid_bytes) /
sizeof(PipelineStoredDescription)
: 0);
if (pipeline_state_storage_told_count &&
xe::filesystem::Seek(pipeline_state_storage_file_,
int64_t(pipeline_state_storage_valid_bytes),
SEEK_SET)) {
if (pipeline_storage_told_count &&
xe::filesystem::Seek(pipeline_storage_file_,
int64_t(pipeline_storage_valid_bytes), SEEK_SET)) {
std::vector<PipelineStoredDescription> pipeline_stored_descriptions;
pipeline_stored_descriptions.resize(pipeline_state_storage_told_count);
pipeline_stored_descriptions.resize(fread(
pipeline_stored_descriptions.data(),
sizeof(PipelineStoredDescription), pipeline_state_storage_told_count,
pipeline_state_storage_file_));
pipeline_stored_descriptions.resize(pipeline_storage_told_count);
pipeline_stored_descriptions.resize(
fread(pipeline_stored_descriptions.data(),
sizeof(PipelineStoredDescription), pipeline_storage_told_count,
pipeline_storage_file_));
if (!pipeline_stored_descriptions.empty()) {
// Launch additional creation threads to use all cores to create
// pipeline state objects faster. Will also be using the main thread, so
// minus 1.
// pipelines faster. Will also be using the main thread, so minus 1.
size_t creation_thread_original_count = creation_threads_.size();
size_t creation_thread_needed_count =
std::max(std::min(pipeline_stored_descriptions.size(),
@ -539,10 +531,10 @@ void PipelineCache::InitializeShaderStorage(
{}, [this, creation_thread_index]() {
CreationThread(creation_thread_index);
});
creation_thread->set_name("D3D12 Pipeline States Additional");
creation_thread->set_name("D3D12 Pipelines");
creation_threads_.push_back(std::move(creation_thread));
}
size_t pipeline_states_created = 0;
size_t pipelines_created = 0;
for (const PipelineStoredDescription& pipeline_stored_description :
pipeline_stored_descriptions) {
const PipelineDescription& pipeline_description =
@ -554,30 +546,28 @@ void PipelineCache::InitializeShaderStorage(
0) != pipeline_stored_description.description_hash) {
break;
}
pipeline_state_storage_valid_bytes +=
sizeof(PipelineStoredDescription);
// Skip already known pipeline states - those have already been
// enqueued.
auto found_range = pipeline_states_.equal_range(
pipeline_storage_valid_bytes += sizeof(PipelineStoredDescription);
// Skip already known pipelines - those have already been enqueued.
auto found_range = pipelines_.equal_range(
pipeline_stored_description.description_hash);
bool pipeline_state_found = false;
bool pipeline_found = false;
for (auto it = found_range.first; it != found_range.second; ++it) {
PipelineState* found_pipeline_state = it->second;
if (!std::memcmp(&found_pipeline_state->description.description,
Pipeline* found_pipeline = it->second;
if (!std::memcmp(&found_pipeline->description.description,
&pipeline_description,
sizeof(pipeline_description))) {
pipeline_state_found = true;
pipeline_found = true;
break;
}
}
if (pipeline_state_found) {
if (pipeline_found) {
continue;
}
PipelineRuntimeDescription pipeline_runtime_description;
auto vertex_shader_it =
shader_map_.find(pipeline_description.vertex_shader_hash);
if (vertex_shader_it == shader_map_.end()) {
shaders_.find(pipeline_description.vertex_shader_hash);
if (vertex_shader_it == shaders_.end()) {
continue;
}
pipeline_runtime_description.vertex_shader = vertex_shader_it->second;
@ -586,8 +576,8 @@ void PipelineCache::InitializeShaderStorage(
}
if (pipeline_description.pixel_shader_hash) {
auto pixel_shader_it =
shader_map_.find(pipeline_description.pixel_shader_hash);
if (pixel_shader_it == shader_map_.end()) {
shaders_.find(pipeline_description.pixel_shader_hash);
if (pixel_shader_it == shaders_.end()) {
continue;
}
pipeline_runtime_description.pixel_shader = pixel_shader_it->second;
@ -607,36 +597,33 @@ void PipelineCache::InitializeShaderStorage(
std::memcpy(&pipeline_runtime_description.description,
&pipeline_description, sizeof(pipeline_description));
PipelineState* new_pipeline_state = new PipelineState;
new_pipeline_state->state = nullptr;
std::memcpy(&new_pipeline_state->description,
&pipeline_runtime_description,
Pipeline* new_pipeline = new Pipeline;
new_pipeline->state = nullptr;
std::memcpy(&new_pipeline->description, &pipeline_runtime_description,
sizeof(pipeline_runtime_description));
pipeline_states_.insert(
std::make_pair(pipeline_stored_description.description_hash,
new_pipeline_state));
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
pipeline_states_.size());
pipelines_.emplace(pipeline_stored_description.description_hash,
new_pipeline);
COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
if (!creation_threads_.empty()) {
// Submit the pipeline for creation to any available thread.
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_queue_.push_back(new_pipeline_state);
creation_queue_.push_back(new_pipeline);
}
creation_request_cond_.notify_one();
} else {
new_pipeline_state->state =
CreateD3D12PipelineState(pipeline_runtime_description);
new_pipeline->state =
CreateD3D12Pipeline(pipeline_runtime_description);
}
++pipeline_states_created;
++pipelines_created;
}
CreateQueuedPipelineStatesOnProcessorThread();
CreateQueuedPipelinesOnProcessorThread();
if (creation_threads_.size() > creation_thread_original_count) {
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_threads_shutdown_from_ = creation_thread_original_count;
// Assuming the queue is empty because of
// CreateQueuedPipelineStatesOnProcessorThread.
// CreateQueuedPipelinesOnProcessorThread.
}
creation_request_cond_.notify_all();
while (creation_threads_.size() > creation_thread_original_count) {
@ -664,26 +651,23 @@ void PipelineCache::InitializeShaderStorage(
}
}
XELOGGPU(
"Created {} graphics pipeline state objects from the storage in {} "
"milliseconds",
pipeline_states_created,
"Created {} graphics pipelines from the storage in {} milliseconds",
pipelines_created,
(xe::Clock::QueryHostTickCount() -
pipeline_state_storage_initialization_start_) *
pipeline_storage_initialization_start_) *
1000 / xe::Clock::QueryHostTickFrequency());
}
}
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_,
pipeline_state_storage_valid_bytes);
xe::filesystem::TruncateStdioFile(pipeline_storage_file_,
pipeline_storage_valid_bytes);
} else {
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, 0);
pipeline_state_storage_file_header.magic = pipeline_state_storage_magic;
pipeline_state_storage_file_header.magic_api =
pipeline_state_storage_magic_api;
pipeline_state_storage_file_header.version_swapped =
xe::filesystem::TruncateStdioFile(pipeline_storage_file_, 0);
pipeline_storage_file_header.magic = pipeline_storage_magic;
pipeline_storage_file_header.magic_api = pipeline_storage_magic_api;
pipeline_storage_file_header.version_swapped =
xe::byte_swap(PipelineDescription::kVersion);
fwrite(&pipeline_state_storage_file_header,
sizeof(pipeline_state_storage_file_header), 1,
pipeline_state_storage_file_);
fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
1, pipeline_storage_file_);
}
shader_storage_root_ = storage_root;
@ -691,7 +675,7 @@ void PipelineCache::InitializeShaderStorage(
// Start the storage writing thread.
storage_write_flush_shaders_ = false;
storage_write_flush_pipeline_states_ = false;
storage_write_flush_pipelines_ = false;
storage_write_thread_shutdown_ = false;
storage_write_thread_ =
xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); });
@ -708,12 +692,12 @@ void PipelineCache::ShutdownShaderStorage() {
storage_write_thread_.reset();
}
storage_write_shader_queue_.clear();
storage_write_pipeline_state_queue_.clear();
storage_write_pipeline_queue_.clear();
if (pipeline_state_storage_file_) {
fclose(pipeline_state_storage_file_);
pipeline_state_storage_file_ = nullptr;
pipeline_state_storage_file_flush_needed_ = false;
if (pipeline_storage_file_) {
fclose(pipeline_storage_file_);
pipeline_storage_file_ = nullptr;
pipeline_storage_file_flush_needed_ = false;
}
if (shader_storage_file_) {
@ -728,30 +712,29 @@ void PipelineCache::ShutdownShaderStorage() {
void PipelineCache::EndSubmission() {
if (shader_storage_file_flush_needed_ ||
pipeline_state_storage_file_flush_needed_) {
pipeline_storage_file_flush_needed_) {
{
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
if (shader_storage_file_flush_needed_) {
storage_write_flush_shaders_ = true;
}
if (pipeline_state_storage_file_flush_needed_) {
storage_write_flush_pipeline_states_ = true;
if (pipeline_storage_file_flush_needed_) {
storage_write_flush_pipelines_ = true;
}
}
storage_write_request_cond_.notify_one();
shader_storage_file_flush_needed_ = false;
pipeline_state_storage_file_flush_needed_ = false;
pipeline_storage_file_flush_needed_ = false;
}
if (!creation_threads_.empty()) {
CreateQueuedPipelineStatesOnProcessorThread();
// Await creation of all queued pipeline state objects.
CreateQueuedPipelinesOnProcessorThread();
// Await creation of all queued pipelines.
bool await_creation_completion_event;
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
// Assuming the creation queue is already empty (because the processor
// thread also worked on creating the leftover pipeline state objects), so
// only check if there are threads with pipeline state objects currently
// being created.
// thread also worked on creating the leftover pipelines), so only check
// if there are threads with pipelines currently being created.
await_creation_completion_event = creation_threads_busy_ != 0;
if (await_creation_completion_event) {
creation_completion_event_->Reset();
@ -765,7 +748,7 @@ void PipelineCache::EndSubmission() {
}
}
bool PipelineCache::IsCreatingPipelineStates() {
bool PipelineCache::IsCreatingPipelines() {
if (creation_threads_.empty()) {
return false;
}
@ -779,8 +762,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
uint32_t dword_count) {
// Hash the input memory and lookup the shader.
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
auto it = shader_map_.find(data_hash);
if (it != shader_map_.end()) {
auto it = shaders_.find(data_hash);
if (it != shaders_.end()) {
// Shader has been previously loaded.
return it->second;
}
@ -790,7 +773,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
// again.
D3D12Shader* shader =
new D3D12Shader(shader_type, data_hash, host_address, dword_count);
shader_map_.insert({data_hash, shader});
shaders_.emplace(data_hash, shader);
return shader;
}
@ -798,11 +781,11 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid()
const {
// If the values this functions returns are changed, INVALIDATE THE SHADER
// STORAGE (increase kVersion for BOTH shaders and pipeline states)! The
// exception is when the function originally returned "unsupported", but
// started to return a valid value (in this case the shader wouldn't be cached
// in the first place). Otherwise games will not be able to locate shaders for
// draws for which the host vertex shader type has changed!
// STORAGE (increase kVersion for BOTH shaders and pipelines)! The exception
// is when the function originally returned "unsupported", but started to
// return a valid value (in this case the shader wouldn't be cached in the
// first place). Otherwise games will not be able to locate shaders for draws
// for which the host vertex shader type has changed!
const auto& regs = register_file_;
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode,
@ -929,13 +912,12 @@ bool PipelineCache::ConfigurePipeline(
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_state_handle_out,
ID3D12RootSignature** root_signature_out) {
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) {
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
assert_not_null(pipeline_state_handle_out);
assert_not_null(pipeline_handle_out);
assert_not_null(root_signature_out);
PipelineRuntimeDescription runtime_description;
@ -946,24 +928,24 @@ bool PipelineCache::ConfigurePipeline(
}
PipelineDescription& description = runtime_description.description;
if (current_pipeline_state_ != nullptr &&
!std::memcmp(&current_pipeline_state_->description.description,
&description, sizeof(description))) {
*pipeline_state_handle_out = current_pipeline_state_;
if (current_pipeline_ != nullptr &&
!std::memcmp(&current_pipeline_->description.description, &description,
sizeof(description))) {
*pipeline_handle_out = current_pipeline_;
*root_signature_out = runtime_description.root_signature;
return true;
}
// Find an existing pipeline state object in the cache.
// Find an existing pipeline in the cache.
uint64_t hash = XXH64(&description, sizeof(description), 0);
auto found_range = pipeline_states_.equal_range(hash);
auto found_range = pipelines_.equal_range(hash);
for (auto it = found_range.first; it != found_range.second; ++it) {
PipelineState* found_pipeline_state = it->second;
if (!std::memcmp(&found_pipeline_state->description.description,
&description, sizeof(description))) {
current_pipeline_state_ = found_pipeline_state;
*pipeline_state_handle_out = found_pipeline_state;
*root_signature_out = found_pipeline_state->description.root_signature;
Pipeline* found_pipeline = it->second;
if (!std::memcmp(&found_pipeline->description.description, &description,
sizeof(description))) {
current_pipeline_ = found_pipeline;
*pipeline_handle_out = found_pipeline;
*root_signature_out = found_pipeline->description.root_signature;
return true;
}
}
@ -974,33 +956,32 @@ bool PipelineCache::ConfigurePipeline(
return false;
}
PipelineState* new_pipeline_state = new PipelineState;
new_pipeline_state->state = nullptr;
std::memcpy(&new_pipeline_state->description, &runtime_description,
Pipeline* new_pipeline = new Pipeline;
new_pipeline->state = nullptr;
std::memcpy(&new_pipeline->description, &runtime_description,
sizeof(runtime_description));
pipeline_states_.insert(std::make_pair(hash, new_pipeline_state));
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
pipeline_states_.size());
pipelines_.emplace(hash, new_pipeline);
COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
if (!creation_threads_.empty()) {
// Submit the pipeline state object for creation to any available thread.
// Submit the pipeline for creation to any available thread.
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_queue_.push_back(new_pipeline_state);
creation_queue_.push_back(new_pipeline);
}
creation_request_cond_.notify_one();
} else {
new_pipeline_state->state = CreateD3D12PipelineState(runtime_description);
new_pipeline->state = CreateD3D12Pipeline(runtime_description);
}
if (pipeline_state_storage_file_) {
if (pipeline_storage_file_) {
assert_not_null(storage_write_thread_);
pipeline_state_storage_file_flush_needed_ = true;
pipeline_storage_file_flush_needed_ = true;
{
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
storage_write_pipeline_state_queue_.emplace_back();
storage_write_pipeline_queue_.emplace_back();
PipelineStoredDescription& stored_description =
storage_write_pipeline_state_queue_.back();
storage_write_pipeline_queue_.back();
stored_description.description_hash = hash;
std::memcpy(&stored_description.description, &description,
sizeof(description));
@ -1008,8 +989,8 @@ bool PipelineCache::ConfigurePipeline(
storage_write_request_cond_.notify_all();
}
current_pipeline_state_ = new_pipeline_state;
*pipeline_state_handle_out = new_pipeline_state;
current_pipeline_ = new_pipeline;
*pipeline_handle_out = new_pipeline;
*root_signature_out = runtime_description.root_signature;
return true;
}
@ -1136,8 +1117,8 @@ bool PipelineCache::TranslateShader(
std::memcpy(
texture_binding_layouts_.data() + new_uid.vector_span_offset,
texture_bindings, texture_binding_layout_bytes);
texture_binding_layout_map_.insert(
{texture_binding_layout_hash, new_uid});
texture_binding_layout_map_.emplace(texture_binding_layout_hash,
new_uid);
}
}
if (bindless_sampler_count) {
@ -1179,8 +1160,8 @@ bool PipelineCache::TranslateShader(
vector_bindless_sampler_layout[i] =
sampler_bindings[i].bindless_descriptor_index;
}
bindless_sampler_layout_map_.insert(
{bindless_sampler_layout_hash, new_uid});
bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash,
new_uid);
}
}
}
@ -1508,8 +1489,7 @@ bool PipelineCache::GetCurrentStateDescription(
/* 16 */ PipelineBlendFactor::kSrcAlphaSat,
};
// Like kBlendFactorMap, but with color modes changed to alpha. Some
// pipeline state objects aren't created in Prey because a color mode is
// used for alpha.
// pipelines aren't created in Prey because a color mode is used for alpha.
static const PipelineBlendFactor kBlendFactorAlphaMap[32] = {
/* 0 */ PipelineBlendFactor::kZero,
/* 1 */ PipelineBlendFactor::kOne,
@ -1569,18 +1549,16 @@ bool PipelineCache::GetCurrentStateDescription(
return true;
}
ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
const PipelineRuntimeDescription& runtime_description) {
const PipelineDescription& description = runtime_description.description;
if (runtime_description.pixel_shader != nullptr) {
XELOGGPU(
"Creating graphics pipeline state with VS {:016X}"
", PS {:016X}",
XELOGGPU("Creating graphics pipeline with VS {:016X}, PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
} else {
XELOGGPU("Creating graphics pipeline state with VS {:016X}",
XELOGGPU("Creating graphics pipeline with VS {:016X}",
runtime_description.vertex_shader->ucode_data_hash());
}
@ -1893,20 +1871,18 @@ ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
}
}
// Create the pipeline state object.
// Create the D3D12 pipeline state object.
auto device =
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
ID3D12PipelineState* state;
if (FAILED(device->CreateGraphicsPipelineState(&state_desc,
IID_PPV_ARGS(&state)))) {
if (runtime_description.pixel_shader != nullptr) {
XELOGE(
"Failed to create graphics pipeline state with VS {:016X}"
", PS {:016X}",
XELOGE("Failed to create graphics pipeline with VS {:016X}, PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
} else {
XELOGE("Failed to create graphics pipeline state with VS {:016X}",
XELOGE("Failed to create graphics pipeline with VS {:016X}",
runtime_description.vertex_shader->ucode_data_hash());
}
return nullptr;
@ -1933,7 +1909,7 @@ void PipelineCache::StorageWriteThread() {
ucode_guest_endian.reserve(0xFFFF);
bool flush_shaders = false;
bool flush_pipeline_states = false;
bool flush_pipelines = false;
while (true) {
if (flush_shaders) {
@ -1941,15 +1917,15 @@ void PipelineCache::StorageWriteThread() {
assert_not_null(shader_storage_file_);
fflush(shader_storage_file_);
}
if (flush_pipeline_states) {
flush_pipeline_states = false;
assert_not_null(pipeline_state_storage_file_);
fflush(pipeline_state_storage_file_);
if (flush_pipelines) {
flush_pipelines = false;
assert_not_null(pipeline_storage_file_);
fflush(pipeline_storage_file_);
}
std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {};
PipelineStoredDescription pipeline_description;
bool write_pipeline_state = false;
bool write_pipeline = false;
{
std::unique_lock<std::mutex> lock(storage_write_request_lock_);
if (storage_write_thread_shutdown_) {
@ -1962,17 +1938,17 @@ void PipelineCache::StorageWriteThread() {
storage_write_flush_shaders_ = false;
flush_shaders = true;
}
if (!storage_write_pipeline_state_queue_.empty()) {
if (!storage_write_pipeline_queue_.empty()) {
std::memcpy(&pipeline_description,
&storage_write_pipeline_state_queue_.front(),
&storage_write_pipeline_queue_.front(),
sizeof(pipeline_description));
storage_write_pipeline_state_queue_.pop_front();
write_pipeline_state = true;
} else if (storage_write_flush_pipeline_states_) {
storage_write_flush_pipeline_states_ = false;
flush_pipeline_states = true;
storage_write_pipeline_queue_.pop_front();
write_pipeline = true;
} else if (storage_write_flush_pipelines_) {
storage_write_flush_pipelines_ = false;
flush_pipelines = true;
}
if (!shader_pair.first && !write_pipeline_state) {
if (!shader_pair.first && !write_pipeline) {
storage_write_request_cond_.wait(lock);
continue;
}
@ -1999,27 +1975,26 @@ void PipelineCache::StorageWriteThread() {
}
}
if (write_pipeline_state) {
assert_not_null(pipeline_state_storage_file_);
if (write_pipeline) {
assert_not_null(pipeline_storage_file_);
fwrite(&pipeline_description, sizeof(pipeline_description), 1,
pipeline_state_storage_file_);
pipeline_storage_file_);
}
}
}
void PipelineCache::CreationThread(size_t thread_index) {
while (true) {
PipelineState* pipeline_state_to_create = nullptr;
Pipeline* pipeline_to_create = nullptr;
// Check if need to shut down or set the completion event and dequeue the
// pipeline state if there is any.
// pipeline if there is any.
{
std::unique_lock<std::mutex> lock(creation_request_lock_);
if (thread_index >= creation_threads_shutdown_from_ ||
creation_queue_.empty()) {
if (creation_completion_set_event_ && creation_threads_busy_ == 0) {
// Last pipeline state object in the queue created - signal the event
// if requested.
// Last pipeline in the queue created - signal the event if requested.
creation_completion_set_event_ = false;
creation_completion_event_->Set();
}
@ -2029,23 +2004,22 @@ void PipelineCache::CreationThread(size_t thread_index) {
creation_request_cond_.wait(lock);
continue;
}
// Take the pipeline state from the queue and increment the busy thread
// count until the pipeline state object is created - other threads must
// be able to dequeue requests, but can't set the completion event until
// the pipeline state objects are fully created (rather than just started
// creating).
pipeline_state_to_create = creation_queue_.front();
// Take the pipeline from the queue and increment the busy thread count
// until the pipeline is created - other threads must be able to dequeue
// requests, but can't set the completion event until the pipelines are
// fully created (rather than just started creating).
pipeline_to_create = creation_queue_.front();
creation_queue_.pop_front();
++creation_threads_busy_;
}
// Create the D3D12 pipeline state object.
pipeline_state_to_create->state =
CreateD3D12PipelineState(pipeline_state_to_create->description);
pipeline_to_create->state =
CreateD3D12Pipeline(pipeline_to_create->description);
// Pipeline state object created - the thread is not busy anymore, safe to
// set the completion event if needed (at the next iteration, or in some
// other thread).
// Pipeline created - the thread is not busy anymore, safe to set the
// completion event if needed (at the next iteration, or in some other
// thread).
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
--creation_threads_busy_;
@ -2053,20 +2027,20 @@ void PipelineCache::CreationThread(size_t thread_index) {
}
}
void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() {
void PipelineCache::CreateQueuedPipelinesOnProcessorThread() {
assert_false(creation_threads_.empty());
while (true) {
PipelineState* pipeline_state_to_create;
Pipeline* pipeline_to_create;
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
if (creation_queue_.empty()) {
break;
}
pipeline_state_to_create = creation_queue_.front();
pipeline_to_create = creation_queue_.front();
creation_queue_.pop_front();
}
pipeline_state_to_create->state =
CreateD3D12PipelineState(pipeline_state_to_create->description);
pipeline_to_create->state =
CreateD3D12Pipeline(pipeline_to_create->description);
}
}

View File

@ -29,6 +29,7 @@
#include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/d3d12/d3d12_api.h"
namespace xe {
namespace gpu {
@ -54,7 +55,7 @@ class PipelineCache {
void ShutdownShaderStorage();
void EndSubmission();
bool IsCreatingPipelineStates();
bool IsCreatingPipelines();
D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address,
const uint32_t* host_address, uint32_t dword_count);
@ -73,14 +74,12 @@ class PipelineCache {
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_state_handle_out,
ID3D12RootSignature** root_signature_out);
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
// Returns a pipeline state object with deferred creation by its handle. May
// return nullptr if failed to create the pipeline state object.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
void* handle) const {
return reinterpret_cast<const PipelineState*>(handle)->state;
// Returns a pipeline with deferred creation by its handle. May return nullptr
// if failed to create the pipeline.
inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
return reinterpret_cast<const Pipeline*>(handle)->state;
}
private:
@ -237,7 +236,7 @@ class PipelineCache {
const RenderTargetCache::PipelineRenderTarget render_targets[5],
PipelineRuntimeDescription& runtime_description_out);
ID3D12PipelineState* CreateD3D12PipelineState(
ID3D12PipelineState* CreateD3D12Pipeline(
const PipelineRuntimeDescription& runtime_description);
D3D12CommandProcessor& command_processor_;
@ -255,9 +254,9 @@ class PipelineCache {
IDxcUtils* dxc_utils_ = nullptr;
IDxcCompiler* dxc_compiler_ = nullptr;
// All loaded shaders mapped by their guest hash key.
// Ucode hash -> shader.
std::unordered_map<uint64_t, D3D12Shader*, xe::hash::IdentityHasher<uint64_t>>
shader_map_;
shaders_;
struct LayoutUID {
size_t uid;
@ -285,21 +284,20 @@ class PipelineCache {
// Xenos pixel shader provided.
std::vector<uint8_t> depth_only_pixel_shader_;
struct PipelineState {
struct Pipeline {
// nullptr if creation has failed.
ID3D12PipelineState* state;
PipelineRuntimeDescription description;
};
// All previously generated pipeline state objects identified by hash and the
// description.
std::unordered_multimap<uint64_t, PipelineState*,
// All previously generated pipelines identified by hash and the description.
std::unordered_multimap<uint64_t, Pipeline*,
xe::hash::IdentityHasher<uint64_t>>
pipeline_states_;
pipelines_;
// Previously used pipeline state object. This matches our current state
// settings and allows us to quickly(ish) reuse the pipeline state if no
// registers have changed.
PipelineState* current_pipeline_state_ = nullptr;
// Previously used pipeline. This matches our current state settings and
// allows us to quickly(ish) reuse the pipeline if no registers have been
// changed.
Pipeline* current_pipeline_ = nullptr;
// Currently open shader storage path.
std::filesystem::path shader_storage_root_;
@ -309,10 +307,9 @@ class PipelineCache {
FILE* shader_storage_file_ = nullptr;
bool shader_storage_file_flush_needed_ = false;
// Pipeline state storage output stream, for preload in the next emulator
// runs.
FILE* pipeline_state_storage_file_ = nullptr;
bool pipeline_state_storage_file_flush_needed_ = false;
// Pipeline storage output stream, for preload in the next emulator runs.
FILE* pipeline_storage_file_ = nullptr;
bool pipeline_storage_file_flush_needed_ = false;
// Thread for asynchronous writing to the storage streams.
void StorageWriteThread();
@ -322,28 +319,27 @@ class PipelineCache {
// thread is notified about its change via storage_write_request_cond_.
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
storage_write_shader_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_state_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_queue_;
bool storage_write_flush_shaders_ = false;
bool storage_write_flush_pipeline_states_ = false;
bool storage_write_flush_pipelines_ = false;
bool storage_write_thread_shutdown_ = false;
std::unique_ptr<xe::threading::Thread> storage_write_thread_;
// Pipeline state object creation threads.
// Pipeline creation threads.
void CreationThread(size_t thread_index);
void CreateQueuedPipelineStatesOnProcessorThread();
void CreateQueuedPipelinesOnProcessorThread();
std::mutex creation_request_lock_;
std::condition_variable creation_request_cond_;
// Protected with creation_request_lock_, notify_one creation_request_cond_
// when set.
std::deque<PipelineState*> creation_queue_;
// Number of threads that are currently creating a pipeline state object -
// incremented when a pipeline state object is dequeued (the completion event
// can't be triggered before this is zero). Protected with
// creation_request_lock_.
std::deque<Pipeline*> creation_queue_;
// Number of threads that are currently creating a pipeline - incremented when
// a pipeline is dequeued (the completion event can't be triggered before this
// is zero). Protected with creation_request_lock_.
size_t creation_threads_busy_ = 0;
// Manual-reset event set when the last queued pipeline state object is
// created and there are no more pipeline state objects to create. This is
// triggered by the thread creating the last pipeline state object.
// Manual-reset event set when the last queued pipeline is created and there
// are no more pipelines to create. This is triggered by the thread creating
// the last pipeline.
std::unique_ptr<xe::threading::Event> creation_completion_event_;
// Whether setting the event on completion is queued. Protected with
// creation_request_lock_, notify_one creation_request_cond_ when set.

View File

@ -454,8 +454,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
// again and again and exit.
if (!conversion_needed || converted_index_count == 0) {
converted_indices.gpu_address = 0;
converted_indices_cache_.insert(
std::make_pair(converted_indices.key.value, converted_indices));
converted_indices_cache_.emplace(converted_indices.key.value,
converted_indices);
memory_regions_used_ |= memory_regions_used_bits;
return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty
: ConversionResult::kConversionNotNeeded;
@ -670,8 +670,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
// Cache and return the indices.
converted_indices.gpu_address = gpu_address;
converted_indices_cache_.insert(
std::make_pair(converted_indices.key.value, converted_indices));
converted_indices_cache_.emplace(converted_indices.key.value,
converted_indices);
memory_regions_used_ |= memory_regions_used_bits;
gpu_address_out = gpu_address;
index_count_out = converted_index_count;

View File

@ -277,19 +277,18 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false;
}
// Create the EDRAM load/store pipeline state objects.
// Create the EDRAM load/store pipelines.
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i];
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState(
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.load_shader, mode_info.load_shader_size,
edram_load_store_root_signature_);
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState(
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.store_shader, mode_info.store_shader_size,
edram_load_store_root_signature_);
if (edram_load_pipelines_[i] == nullptr ||
edram_store_pipelines_[i] == nullptr) {
XELOGE(
"Failed to create the EDRAM load/store pipeline states for mode {}",
XELOGE("Failed to create the EDRAM load/store pipelines for mode {}",
i);
Shutdown();
return false;
@ -299,7 +298,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
}
}
// Create the resolve root signatures and pipeline state objects.
// Create the resolve root signatures and pipelines.
D3D12_ROOT_PARAMETER resolve_root_parameters[3];
// Copying root signature.
@ -369,7 +368,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false;
}
// Copying pipeline state objects.
// Copying pipelines.
uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1;
for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount);
++i) {
@ -381,63 +380,61 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
continue;
}
const auto& resolve_copy_shader = resolve_copy_shaders_[i];
ID3D12PipelineState* resolve_copy_pipeline_state =
ui::d3d12::util::CreateComputePipelineState(
ID3D12PipelineState* resolve_copy_pipeline =
ui::d3d12::util::CreateComputePipeline(
device, resolve_copy_shader.first, resolve_copy_shader.second,
resolve_copy_root_signature_);
if (resolve_copy_pipeline_state == nullptr) {
XELOGE("Failed to create {} resolve copy pipeline state",
if (resolve_copy_pipeline == nullptr) {
XELOGE("Failed to create {} resolve copy pipeline",
resolve_copy_shader_info.debug_name);
}
resolve_copy_pipeline_state->SetName(reinterpret_cast<LPCWSTR>(
resolve_copy_pipeline->SetName(reinterpret_cast<LPCWSTR>(
xe::to_utf16(resolve_copy_shader_info.debug_name).c_str()));
resolve_copy_pipeline_states_[i] = resolve_copy_pipeline_state;
resolve_copy_pipelines_[i] = resolve_copy_pipeline;
}
// Clearing pipeline state objects.
resolve_clear_32bpp_pipeline_state_ =
ui::d3d12::util::CreateComputePipelineState(
// Clearing pipelines.
resolve_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
device,
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
: resolve_clear_32bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
: sizeof(resolve_clear_32bpp_cs),
resolve_clear_root_signature_);
if (resolve_clear_32bpp_pipeline_state_ == nullptr) {
XELOGE("Failed to create the 32bpp resolve clear pipeline state");
if (resolve_clear_32bpp_pipeline_ == nullptr) {
XELOGE("Failed to create the 32bpp resolve clear pipeline");
Shutdown();
return false;
}
resolve_clear_32bpp_pipeline_state_->SetName(L"Resolve Clear 32bpp");
resolve_clear_64bpp_pipeline_state_ =
ui::d3d12::util::CreateComputePipelineState(
resolve_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp");
resolve_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
device,
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
: resolve_clear_64bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
: sizeof(resolve_clear_64bpp_cs),
resolve_clear_root_signature_);
if (resolve_clear_64bpp_pipeline_state_ == nullptr) {
XELOGE("Failed to create the 64bpp resolve clear pipeline state");
if (resolve_clear_64bpp_pipeline_ == nullptr) {
XELOGE("Failed to create the 64bpp resolve clear pipeline");
Shutdown();
return false;
}
resolve_clear_64bpp_pipeline_state_->SetName(L"Resolve Clear 64bpp");
resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp");
if (!edram_rov_used_) {
assert_false(resolution_scale_2x_);
resolve_clear_depth_24_32_pipeline_state_ =
ui::d3d12::util::CreateComputePipelineState(
resolve_clear_depth_24_32_pipeline_ =
ui::d3d12::util::CreateComputePipeline(
device, resolve_clear_depth_24_32_cs,
sizeof(resolve_clear_depth_24_32_cs),
resolve_clear_root_signature_);
if (resolve_clear_depth_24_32_pipeline_state_ == nullptr) {
if (resolve_clear_depth_24_32_pipeline_ == nullptr) {
XELOGE(
"Failed to create the 24-bit and 32-bit depth resolve clear pipeline "
"state");
Shutdown();
return false;
}
resolve_clear_64bpp_pipeline_state_->SetName(
resolve_clear_64bpp_pipeline_->SetName(
L"Resolve Clear 24-bit & 32-bit Depth");
}
@ -451,12 +448,12 @@ void RenderTargetCache::Shutdown() {
edram_snapshot_restore_pool_.reset();
ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_state_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_state_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_state_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_);
for (size_t i = 0; i < xe::countof(resolve_copy_pipeline_states_); ++i) {
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipeline_states_[i]);
for (size_t i = 0; i < xe::countof(resolve_copy_pipelines_); ++i) {
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipelines_[i]);
}
ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_);
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
@ -1209,8 +1206,8 @@ bool RenderTargetCache::Resolve(const Memory& memory,
0, sizeof(copy_shader_constants) / sizeof(uint32_t),
&copy_shader_constants, 0);
}
command_processor_.SetComputePipelineState(
resolve_copy_pipeline_states_[size_t(copy_shader)]);
command_processor_.SetComputePipeline(
resolve_copy_pipelines_[size_t(copy_shader)]);
command_processor_.SubmitBarriers();
command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1);
@ -1279,9 +1276,9 @@ bool RenderTargetCache::Resolve(const Memory& memory,
command_list.D3DSetComputeRoot32BitConstants(
0, sizeof(depth_clear_constants) / sizeof(uint32_t),
&depth_clear_constants, 0);
command_processor_.SetComputePipelineState(
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_state_
: resolve_clear_32bpp_pipeline_state_);
command_processor_.SetComputePipeline(
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_
: resolve_clear_32bpp_pipeline_);
command_processor_.SubmitBarriers();
command_list.D3DDispatch(clear_group_count.first,
clear_group_count.second, 1);
@ -1301,10 +1298,10 @@ bool RenderTargetCache::Resolve(const Memory& memory,
0, sizeof(color_clear_constants) / sizeof(uint32_t),
&color_clear_constants, 0);
}
command_processor_.SetComputePipelineState(
command_processor_.SetComputePipeline(
resolve_info.color_edram_info.format_is_64bpp
? resolve_clear_64bpp_pipeline_state_
: resolve_clear_32bpp_pipeline_state_);
? resolve_clear_64bpp_pipeline_
: resolve_clear_32bpp_pipeline_);
command_processor_.SubmitBarriers();
command_list.D3DDispatch(clear_group_count.first,
clear_group_count.second, 1);
@ -1816,7 +1813,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
render_target->footprints, nullptr, nullptr,
&copy_buffer_size);
render_target->copy_buffer_size = uint32_t(copy_buffer_size);
render_targets_.insert(std::make_pair(key.value, render_target));
render_targets_.emplace(key.value, render_target);
COUNT_profile_set("gpu/render_target_cache/render_targets",
render_targets_.size());
#if 0
@ -2015,8 +2012,7 @@ void RenderTargetCache::StoreRenderTargetsToEdram() {
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format);
command_processor_.SetComputePipelineState(
edram_store_pipelines_[size_t(mode)]);
command_processor_.SetComputePipeline(edram_store_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples.
command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1);
@ -2140,8 +2136,7 @@ void RenderTargetCache::LoadRenderTargetsFromEdram(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format);
command_processor_.SetComputePipelineState(
edram_load_pipelines_[size_t(mode)]);
command_processor_.SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples.
command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1);

View File

@ -237,14 +237,13 @@ class D3D12CommandProcessor;
// get each of the 4 host pixels for each sample.
class RenderTargetCache {
public:
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
// that contradict each other when you use null RTV descriptors - if you set
// a valid format in RTVFormats in the pipeline state, it says that null
// descriptors can only be used if the format in the pipeline state is
// DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains
// that the format in the pipeline doesn't match the RTV format. So we have to
// make render target bindings consecutive and remap the output indices in
// pixel shaders.
// Direct3D 12 debug layer is giving errors that contradict each other when
// you use null RTV descriptors - if you set a valid format in RTVFormats in
// the pipeline state, it says that null descriptors can only be used if the
// format in the pipeline state is DXGI_FORMAT_UNKNOWN, however, if
// DXGI_FORMAT_UNKNOWN is set, it complains that the format in the pipeline
// state doesn't match the RTV format. So we have to make render target
// bindings consecutive and remap the output indices in pixel shaders.
struct PipelineRenderTarget {
uint32_t guest_render_target;
DXGI_FORMAT format;
@ -537,7 +536,7 @@ class RenderTargetCache {
// 16: - EDRAM pitch in tiles.
uint32_t base_samples_2x_depth_pitch;
};
// EDRAM pipeline states for the RTV/DSV path.
// EDRAM pipelines for the RTV/DSV path.
static const EdramLoadStoreModeInfo
edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)];
ID3D12PipelineState*
@ -546,20 +545,20 @@ class RenderTargetCache {
ID3D12PipelineState*
edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {};
// Resolve root signatures and pipeline state objects.
// Resolve root signatures and pipelines.
ID3D12RootSignature* resolve_copy_root_signature_ = nullptr;
static const std::pair<const uint8_t*, size_t>
resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)];
ID3D12PipelineState* resolve_copy_pipeline_states_[size_t(
ID3D12PipelineState* resolve_copy_pipelines_[size_t(
draw_util::ResolveCopyShaderIndex::kCount)] = {};
ID3D12RootSignature* resolve_clear_root_signature_ = nullptr;
// Clearing 32bpp color, depth with ROV, or unorm depth without ROV.
ID3D12PipelineState* resolve_clear_32bpp_pipeline_state_ = nullptr;
ID3D12PipelineState* resolve_clear_32bpp_pipeline_ = nullptr;
// Clearing 64bpp color.
ID3D12PipelineState* resolve_clear_64bpp_pipeline_state_ = nullptr;
ID3D12PipelineState* resolve_clear_64bpp_pipeline_ = nullptr;
// Clearing float depth without ROV, both the float24 and the host float32
// versions.
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_state_ = nullptr;
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_ = nullptr;
// FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on
// Nvidia Maxwell 1st generation and older.

View File

@ -918,27 +918,24 @@ bool TextureCache::Initialize(bool edram_rov_used) {
return false;
}
// Create the loading pipeline state objects.
// Create the loading pipelines.
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
const LoadModeInfo& mode_info = load_mode_info_[i];
load_pipeline_states_[i] = ui::d3d12::util::CreateComputePipelineState(
load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.shader, mode_info.shader_size, load_root_signature_);
if (load_pipeline_states_[i] == nullptr) {
XELOGE(
"Failed to create the texture loading pipeline state object for mode "
"{}",
i);
if (load_pipelines_[i] == nullptr) {
XELOGE("Failed to create the texture loading pipeline for mode {}", i);
Shutdown();
return false;
}
if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) {
load_pipeline_states_2x_[i] = ui::d3d12::util::CreateComputePipelineState(
load_pipelines_2x_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.shader_2x, mode_info.shader_2x_size,
load_root_signature_);
if (load_pipeline_states_2x_[i] == nullptr) {
if (load_pipelines_2x_[i] == nullptr) {
XELOGE(
"Failed to create the 2x-scaled texture loading pipeline state "
"for mode {}",
"Failed to create the 2x-scaled texture loading pipeline for mode "
"{}",
i);
Shutdown();
return false;
@ -1024,8 +1021,8 @@ void TextureCache::Shutdown() {
ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_);
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_2x_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipelines_2x_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipelines_[i]);
}
ui::d3d12::util::ReleaseAndNull(load_root_signature_);
@ -1892,7 +1889,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
if (IsResolutionScale2X() && key.tiled) {
LoadMode load_mode = GetLoadMode(key);
if (load_mode != LoadMode::kUnknown &&
load_pipeline_states_2x_[uint32_t(load_mode)] != nullptr) {
load_pipelines_2x_[uint32_t(load_mode)] != nullptr) {
uint32_t base_size = 0, mip_size = 0;
texture_util::GetTextureTotalSize(
key.dimension, key.width, key.height, key.depth, key.format,
@ -2047,7 +2044,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
}
texture->base_watch_handle = nullptr;
texture->mip_watch_handle = nullptr;
textures_.insert(std::make_pair(map_key, texture));
textures_.emplace(map_key, texture);
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
textures_total_size_ += texture->resource_size;
COUNT_profile_set("gpu/texture_cache/total_size_mb",
@ -2079,10 +2076,10 @@ bool TextureCache::LoadTextureData(Texture* texture) {
return false;
}
bool scaled_resolve = texture->key.scaled_resolve ? true : false;
ID3D12PipelineState* pipeline_state =
scaled_resolve ? load_pipeline_states_2x_[uint32_t(load_mode)]
: load_pipeline_states_[uint32_t(load_mode)];
if (pipeline_state == nullptr) {
ID3D12PipelineState* pipeline = scaled_resolve
? load_pipelines_2x_[uint32_t(load_mode)]
: load_pipelines_[uint32_t(load_mode)];
if (pipeline == nullptr) {
return false;
}
const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)];
@ -2296,7 +2293,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
load_mode_info.srv_bpe_log2);
}
}
command_processor_.SetComputePipelineState(pipeline_state);
command_processor_.SetComputePipeline(pipeline);
command_list.D3DSetComputeRootSignature(load_root_signature_);
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second);
@ -2597,7 +2594,7 @@ uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture,
}
device->CreateShaderResourceView(
texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index));
texture.srv_descriptors.insert({descriptor_key, descriptor_index});
texture.srv_descriptors.emplace(descriptor_key, descriptor_index);
return descriptor_index;
}

View File

@ -550,9 +550,9 @@ class TextureCache {
static const LoadModeInfo load_mode_info_[];
ID3D12RootSignature* load_root_signature_ = nullptr;
ID3D12PipelineState* load_pipeline_states_[size_t(LoadMode::kCount)] = {};
// Load pipeline state objects for 2x-scaled resolved targets.
ID3D12PipelineState* load_pipeline_states_2x_[size_t(LoadMode::kCount)] = {};
ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {};
// Load pipelines for 2x-scaled resolved targets.
ID3D12PipelineState* load_pipelines_2x_[size_t(LoadMode::kCount)] = {};
std::unordered_multimap<uint64_t, Texture*> textures_;
uint64_t textures_total_size_ = 0;

View File

@ -99,8 +99,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0),
DxbcSrc::LU(~uint32_t(3)));
}
// Add the word offset from the instruction, plus the offset of the first
// needed word within the element.
// Add the word offset from the instruction (signed), plus the offset of the
// first needed word within the element.
uint32_t first_word_index;
xe::bit_scan_forward(needed_words, &first_word_index);
int32_t first_word_buffer_offset =

View File

@ -65,17 +65,17 @@ enum class InstructionStorageTarget {
// disassembly (because oPts.x000 will be assembled, but oPts.x00_ has both
// skipped components and zeros, which cannot be encoded, and therefore it will
// not).
constexpr uint32_t GetInstructionStorageTargetUsedComponents(
constexpr uint32_t GetInstructionStorageTargetUsedComponentCount(
InstructionStorageTarget target) {
switch (target) {
case InstructionStorageTarget::kNone:
return 0b0000;
return 0;
case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex:
return 0b0111;
return 3;
case InstructionStorageTarget::kDepth:
return 0b0001;
return 1;
default:
return 0b1111;
return 4;
}
}
@ -136,8 +136,9 @@ struct InstructionResult {
// Returns the write mask containing only components actually present in the
// target.
uint32_t GetUsedWriteMask() const {
return original_write_mask &
GetInstructionStorageTargetUsedComponents(storage_target);
uint32_t target_component_count =
GetInstructionStorageTargetUsedComponentCount(storage_target);
return original_write_mask & ((1 << target_component_count) - 1);
}
// True if the components are in their 'standard' swizzle arrangement (xyzw).
bool IsStandardSwizzle() const {
@ -161,6 +162,28 @@ struct InstructionResult {
}
return used_components;
}
// Returns which components of the used write mask are constant, and what
// values they have.
uint32_t GetUsedConstantComponents(uint32_t& constant_values_out) const {
uint32_t constant_components = 0;
uint32_t constant_values = 0;
uint32_t used_write_mask = GetUsedWriteMask();
for (uint32_t i = 0; i < 4; ++i) {
if (!(used_write_mask & (1 << i))) {
continue;
}
SwizzleSource component = components[i];
if (component >= SwizzleSource::kX && component <= SwizzleSource::kW) {
continue;
}
constant_components |= 1 << i;
if (component == SwizzleSource::k1) {
constant_values |= 1 << i;
}
}
constant_values_out = constant_values;
return constant_components;
}
};
enum class InstructionStorageSource {

View File

@ -25,6 +25,9 @@ namespace gpu {
// system page size granularity.
class SharedMemory {
public:
static constexpr uint32_t kBufferSizeLog2 = 29;
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
virtual ~SharedMemory();
// Call in the implementation-specific ClearCache.
virtual void ClearCache();
@ -98,9 +101,6 @@ class SharedMemory {
// destructor.
void ShutdownCommon();
static constexpr uint32_t kBufferSizeLog2 = 29;
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
// Sparse allocations are 4 MB, so not too many of them are allocated, but
// also not to waste too much memory for padding (with 16 MB there's too
// much).

View File

@ -118,15 +118,15 @@ bool D3D12ImmediateDrawer::Initialize() {
return false;
}
// Create the pipeline states.
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_desc = {};
pipeline_state_desc.pRootSignature = root_signature_;
pipeline_state_desc.VS.pShaderBytecode = immediate_vs;
pipeline_state_desc.VS.BytecodeLength = sizeof(immediate_vs);
pipeline_state_desc.PS.pShaderBytecode = immediate_ps;
pipeline_state_desc.PS.BytecodeLength = sizeof(immediate_ps);
// Create the pipelines.
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {};
pipeline_desc.pRootSignature = root_signature_;
pipeline_desc.VS.pShaderBytecode = immediate_vs;
pipeline_desc.VS.BytecodeLength = sizeof(immediate_vs);
pipeline_desc.PS.pShaderBytecode = immediate_ps;
pipeline_desc.PS.BytecodeLength = sizeof(immediate_ps);
D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc =
pipeline_state_desc.BlendState.RenderTarget[0];
pipeline_desc.BlendState.RenderTarget[0];
pipeline_blend_desc.BlendEnable = TRUE;
pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA;
pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
@ -138,11 +138,11 @@ bool D3D12ImmediateDrawer::Initialize() {
pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED |
D3D12_COLOR_WRITE_ENABLE_GREEN |
D3D12_COLOR_WRITE_ENABLE_BLUE;
pipeline_state_desc.SampleMask = UINT_MAX;
pipeline_state_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
pipeline_state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pipeline_state_desc.RasterizerState.FrontCounterClockwise = FALSE;
pipeline_state_desc.RasterizerState.DepthClipEnable = TRUE;
pipeline_desc.SampleMask = UINT_MAX;
pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pipeline_desc.RasterizerState.FrontCounterClockwise = FALSE;
pipeline_desc.RasterizerState.DepthClipEnable = TRUE;
D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {};
pipeline_input_elements[0].SemanticName = "POSITION";
pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT;
@ -154,26 +154,24 @@ bool D3D12ImmediateDrawer::Initialize() {
pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM;
pipeline_input_elements[2].AlignedByteOffset =
offsetof(ImmediateVertex, color);
pipeline_state_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
pipeline_state_desc.InputLayout.NumElements =
pipeline_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
pipeline_desc.InputLayout.NumElements =
UINT(xe::countof(pipeline_input_elements));
pipeline_state_desc.PrimitiveTopologyType =
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
pipeline_state_desc.NumRenderTargets = 1;
pipeline_state_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
pipeline_state_desc.SampleDesc.Count = 1;
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
pipeline_desc.NumRenderTargets = 1;
pipeline_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
pipeline_desc.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState(
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_triangle_)))) {
&pipeline_desc, IID_PPV_ARGS(&pipeline_triangle_)))) {
XELOGE(
"Failed to create the Direct3D 12 immediate drawer triangle pipeline "
"state");
Shutdown();
return false;
}
pipeline_state_desc.PrimitiveTopologyType =
D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
if (FAILED(device->CreateGraphicsPipelineState(
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_line_)))) {
&pipeline_desc, IID_PPV_ARGS(&pipeline_line_)))) {
XELOGE(
"Failed to create the Direct3D 12 immediate drawer line pipeline "
"state");
@ -267,8 +265,8 @@ void D3D12ImmediateDrawer::Shutdown() {
util::ReleaseAndNull(sampler_heap_);
util::ReleaseAndNull(pipeline_state_line_);
util::ReleaseAndNull(pipeline_state_triangle_);
util::ReleaseAndNull(pipeline_line_);
util::ReleaseAndNull(pipeline_triangle_);
util::ReleaseAndNull(root_signature_);
}
@ -611,17 +609,17 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
uint32_t(sampler_index)));
}
// Set the primitive type and the pipeline state for it.
// Set the primitive type and the pipeline for it.
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
ID3D12PipelineState* pipeline_state;
ID3D12PipelineState* pipeline;
switch (draw.primitive_type) {
case ImmediatePrimitiveType::kLines:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
pipeline_state = pipeline_state_line_;
pipeline = pipeline_line_;
break;
case ImmediatePrimitiveType::kTriangles:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
pipeline_state = pipeline_state_triangle_;
pipeline = pipeline_triangle_;
break;
default:
assert_unhandled_case(draw.primitive_type);
@ -630,7 +628,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
if (current_primitive_topology_ != primitive_topology) {
current_primitive_topology_ = primitive_topology;
current_command_list_->IASetPrimitiveTopology(primitive_topology);
current_command_list_->SetPipelineState(pipeline_state);
current_command_list_->SetPipelineState(pipeline);
}
// Draw.

View File

@ -105,8 +105,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer {
kCount
};
ID3D12PipelineState* pipeline_state_triangle_ = nullptr;
ID3D12PipelineState* pipeline_state_line_ = nullptr;
ID3D12PipelineState* pipeline_triangle_ = nullptr;
ID3D12PipelineState* pipeline_line_ = nullptr;
ID3D12DescriptorHeap* sampler_heap_ = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_;

View File

@ -47,7 +47,7 @@ ID3D12RootSignature* CreateRootSignature(
return root_signature;
}
ID3D12PipelineState* CreateComputePipelineState(
ID3D12PipelineState* CreateComputePipeline(
ID3D12Device* device, const void* shader, size_t shader_size,
ID3D12RootSignature* root_signature) {
D3D12_COMPUTE_PIPELINE_STATE_DESC desc;

View File

@ -39,8 +39,9 @@ inline bool ReleaseAndNull(T& object) {
ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider,
const D3D12_ROOT_SIGNATURE_DESC& desc);
ID3D12PipelineState* CreateComputePipelineState(
ID3D12Device* device, const void* shader, size_t shader_size,
ID3D12PipelineState* CreateComputePipeline(ID3D12Device* device,
const void* shader,
size_t shader_size,
ID3D12RootSignature* root_signature);
constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) {

View File

@ -71,7 +71,7 @@ void GraphicsUploadBufferPool::FlushWrites() {
GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request(
uint64_t submission_index, size_t size, size_t alignment,
size_t& offset_out) {
assert_not_zero(alignment);
alignment = std::max(alignment, size_t(1));
assert_true(xe::is_pow2(alignment));
size = xe::align(size, alignment);
assert_true(size <= page_size_);
@ -126,7 +126,7 @@ GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request(
GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::RequestPartial(
uint64_t submission_index, size_t size, size_t alignment,
size_t& offset_out, size_t& size_out) {
assert_not_zero(alignment);
alignment = std::max(alignment, size_t(1));
assert_true(xe::is_pow2(alignment));
size = xe::align(size, alignment);
size = std::min(size, page_size_);