[D3D12] Cleanup: pipeline state -> pipeline, other things
This commit is contained in:
parent
87a3c5fac2
commit
6b988d43c7
|
@ -387,7 +387,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|||
sampler_count_vertex);
|
||||
return nullptr;
|
||||
}
|
||||
root_signatures_bindful_.insert({index, root_signature});
|
||||
root_signatures_bindful_.emplace(index, root_signature);
|
||||
return root_signature;
|
||||
}
|
||||
|
||||
|
@ -745,12 +745,11 @@ void D3D12CommandProcessor::SetSamplePositions(
|
|||
current_sample_positions_ = sample_positions;
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::SetComputePipelineState(
|
||||
ID3D12PipelineState* pipeline_state) {
|
||||
if (current_external_pipeline_state_ != pipeline_state) {
|
||||
deferred_command_list_.D3DSetPipelineState(pipeline_state);
|
||||
current_external_pipeline_state_ = pipeline_state;
|
||||
current_cached_pipeline_state_ = nullptr;
|
||||
void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
|
||||
if (current_external_pipeline_ != pipeline) {
|
||||
deferred_command_list_.D3DSetPipelineState(pipeline);
|
||||
current_external_pipeline_ = pipeline;
|
||||
current_cached_pipeline_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -773,8 +772,16 @@ std::string D3D12CommandProcessor::GetWindowTitleText() const {
|
|||
}
|
||||
// Currently scaling is only supported with ROV.
|
||||
if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) {
|
||||
return "Direct3D 12 - 2x";
|
||||
return "Direct3D 12 - ROV 2x";
|
||||
}
|
||||
// Rasterizer-ordered views are a feature very rarely used as of 2020 and
|
||||
// that faces adoption complications (outside of Direct3D - on Vulkan - at
|
||||
// least), but crucial to Xenia - raise awareness of its usage.
|
||||
// https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319
|
||||
// "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I
|
||||
// wasn't aware that Xenia D3D12 backend was using Raster Order Views
|
||||
// feature" - oscarbg in that issue.
|
||||
return "Direct3D 12 - ROV";
|
||||
}
|
||||
return "Direct3D 12";
|
||||
}
|
||||
|
@ -1196,7 +1203,7 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
*this, *register_file_, bindless_resources_used_, edram_rov_used_,
|
||||
texture_cache_->IsResolutionScale2X() ? 2 : 1);
|
||||
if (!pipeline_cache_->Initialize()) {
|
||||
XELOGE("Failed to initialize the graphics pipeline state cache");
|
||||
XELOGE("Failed to initialize the graphics pipeline cache");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1526,8 +1533,7 @@ void D3D12CommandProcessor::ShutdownContext() {
|
|||
// Shut down binding - bindless descriptors may be owned by subsystems like
|
||||
// the texture cache.
|
||||
|
||||
// Root signatured are used by pipeline states, thus freed after the pipeline
|
||||
// states.
|
||||
// Root signatures are used by pipelines, thus freed after the pipelines.
|
||||
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_);
|
||||
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_);
|
||||
for (auto it : root_signatures_bindful_) {
|
||||
|
@ -1878,7 +1884,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
xenos::VertexShaderExportMode::kMultipass ||
|
||||
(primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
|
||||
pa_su_sc_mode_cntl.cull_back))) {
|
||||
// All faces are culled - can't be expressed in the pipeline state.
|
||||
// All faces are culled - can't be expressed in the pipeline.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1954,7 +1960,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
line_loop_closing_index = 0;
|
||||
}
|
||||
|
||||
// Update the textures - this may bind pipeline state objects.
|
||||
// Update the textures - this may bind pipelines.
|
||||
uint32_t used_texture_mask =
|
||||
vertex_shader->GetUsedTextureMask() |
|
||||
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
|
||||
|
@ -1972,21 +1978,21 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
early_z = true;
|
||||
}
|
||||
|
||||
// Create the pipeline state object if needed and bind it.
|
||||
void* pipeline_state_handle;
|
||||
// Create the pipeline if needed and bind it.
|
||||
void* pipeline_handle;
|
||||
ID3D12RootSignature* root_signature;
|
||||
if (!pipeline_cache_->ConfigurePipeline(
|
||||
vertex_shader, pixel_shader, primitive_type_converted,
|
||||
indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16,
|
||||
early_z, pipeline_render_targets, &pipeline_state_handle,
|
||||
early_z, pipeline_render_targets, &pipeline_handle,
|
||||
&root_signature)) {
|
||||
return false;
|
||||
}
|
||||
if (current_cached_pipeline_state_ != pipeline_state_handle) {
|
||||
if (current_cached_pipeline_ != pipeline_handle) {
|
||||
deferred_command_list_.SetPipelineStateHandle(
|
||||
reinterpret_cast<void*>(pipeline_state_handle));
|
||||
current_cached_pipeline_state_ = pipeline_state_handle;
|
||||
current_external_pipeline_state_ = nullptr;
|
||||
reinterpret_cast<void*>(pipeline_handle));
|
||||
current_cached_pipeline_ = pipeline_handle;
|
||||
current_external_pipeline_ = nullptr;
|
||||
}
|
||||
|
||||
// Update viewport, scissor, blend factor and stencil reference.
|
||||
|
@ -2005,14 +2011,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
}
|
||||
// Must not call anything that can change the descriptor heap from now on!
|
||||
|
||||
// Ensure vertex and index buffers are resident and draw.
|
||||
// Ensure vertex buffers are resident.
|
||||
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
|
||||
// validity will be tracked.
|
||||
// validity is tracked.
|
||||
uint64_t vertex_buffers_resident[2] = {};
|
||||
for (const auto& vertex_binding : vertex_shader->vertex_bindings()) {
|
||||
for (const Shader::VertexBinding& vertex_binding :
|
||||
vertex_shader->vertex_bindings()) {
|
||||
uint32_t vfetch_index = vertex_binding.fetch_constant;
|
||||
if (vertex_buffers_resident[vfetch_index >> 6] &
|
||||
(1ull << (vfetch_index & 63))) {
|
||||
(uint64_t(1) << (vfetch_index & 63))) {
|
||||
continue;
|
||||
}
|
||||
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
||||
|
@ -2045,7 +2052,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
vfetch_constant.address << 2, vfetch_constant.size << 2);
|
||||
return false;
|
||||
}
|
||||
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
||||
vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
|
||||
<< (vfetch_index & 63);
|
||||
}
|
||||
|
||||
// Gather memexport ranges and ensure the heaps for them are resident, and
|
||||
|
@ -2517,8 +2525,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
|||
submission_open_ = true;
|
||||
|
||||
// Start a new deferred command list - will submit it to the real one in the
|
||||
// end of the submission (when async pipeline state object creation requests
|
||||
// are fulfilled).
|
||||
// end of the submission (when async pipeline creation requests are
|
||||
// fulfilled).
|
||||
deferred_command_list_.Reset();
|
||||
|
||||
// Reset cached state of the command list.
|
||||
|
@ -2527,8 +2535,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
|||
ff_blend_factor_update_needed_ = true;
|
||||
ff_stencil_ref_update_needed_ = true;
|
||||
current_sample_positions_ = xenos::MsaaSamples::k1X;
|
||||
current_cached_pipeline_state_ = nullptr;
|
||||
current_external_pipeline_state_ = nullptr;
|
||||
current_cached_pipeline_ = nullptr;
|
||||
current_external_pipeline_ = nullptr;
|
||||
current_graphics_root_signature_ = nullptr;
|
||||
current_graphics_root_up_to_date_ = 0;
|
||||
if (bindless_resources_used_) {
|
||||
|
@ -2724,7 +2732,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
|
|||
}
|
||||
|
||||
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
|
||||
return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates();
|
||||
return !submission_open_ || !pipeline_cache_->IsCreatingPipelines();
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::ClearCommandAllocatorCache() {
|
||||
|
@ -2745,12 +2753,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() {
|
|||
}
|
||||
|
||||
void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
|
||||
// Window parameters.
|
||||
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
|
||||
// See r200UpdateWindow:
|
||||
|
@ -2846,14 +2854,14 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
|||
scissor.right = pa_sc_window_scissor_br.br_x;
|
||||
scissor.bottom = pa_sc_window_scissor_br.br_y;
|
||||
if (!pa_sc_window_scissor_tl.window_offset_disable) {
|
||||
scissor.left =
|
||||
std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0));
|
||||
scissor.top =
|
||||
std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0));
|
||||
scissor.right =
|
||||
std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0));
|
||||
scissor.bottom =
|
||||
std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0));
|
||||
scissor.left = std::max(
|
||||
LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0));
|
||||
scissor.top = std::max(
|
||||
LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0));
|
||||
scissor.right = std::max(
|
||||
LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0));
|
||||
scissor.bottom = std::max(
|
||||
LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0));
|
||||
}
|
||||
scissor.left *= pixel_size_x;
|
||||
scissor.top *= pixel_size_y;
|
||||
|
@ -2915,12 +2923,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
uint32_t line_loop_closing_index, xenos::Endian index_endian,
|
||||
uint32_t used_texture_mask, bool early_z, uint32_t color_mask,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
|
||||
|
@ -3103,14 +3110,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index;
|
||||
system_constants_.line_loop_closing_index = line_loop_closing_index;
|
||||
|
||||
// Vertex index offset.
|
||||
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
||||
system_constants_.vertex_base_index = vgt_indx_offset;
|
||||
|
||||
// Index or tessellation edge factor buffer endianness.
|
||||
dirty |= system_constants_.vertex_index_endian != index_endian;
|
||||
system_constants_.vertex_index_endian = index_endian;
|
||||
|
||||
// Vertex index offset.
|
||||
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
||||
system_constants_.vertex_base_index = vgt_indx_offset;
|
||||
|
||||
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
|
||||
if (!pa_cl_clip_cntl.clip_disable) {
|
||||
for (uint32_t i = 0; i < 6; ++i) {
|
||||
|
@ -3574,7 +3581,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
float_constant_map_vertex.float_bitmap[i];
|
||||
// If no float constants at all, we can reuse any buffer for them, so not
|
||||
// invalidating.
|
||||
if (float_constant_map_vertex.float_count != 0) {
|
||||
if (float_constant_count_vertex) {
|
||||
cbuffer_binding_float_vertex_.up_to_date = false;
|
||||
}
|
||||
}
|
||||
|
@ -3589,7 +3596,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
float_constant_map_pixel.float_bitmap[i]) {
|
||||
current_float_constant_map_pixel_[i] =
|
||||
float_constant_map_pixel.float_bitmap[i];
|
||||
if (float_constant_map_pixel.float_count != 0) {
|
||||
if (float_constant_count_pixel) {
|
||||
cbuffer_binding_float_pixel_.up_to_date = false;
|
||||
}
|
||||
}
|
||||
|
@ -3889,8 +3896,8 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
sampler_parameters,
|
||||
provider.OffsetSamplerDescriptor(
|
||||
sampler_bindless_heap_cpu_start_, sampler_index));
|
||||
texture_cache_bindless_sampler_map_.insert(
|
||||
{sampler_parameters.value, sampler_index});
|
||||
texture_cache_bindless_sampler_map_.emplace(
|
||||
sampler_parameters.value, sampler_index);
|
||||
}
|
||||
current_sampler_bindless_indices_vertex_[j] = sampler_index;
|
||||
}
|
||||
|
@ -3921,8 +3928,8 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
sampler_parameters,
|
||||
provider.OffsetSamplerDescriptor(
|
||||
sampler_bindless_heap_cpu_start_, sampler_index));
|
||||
texture_cache_bindless_sampler_map_.insert(
|
||||
{sampler_parameters.value, sampler_index});
|
||||
texture_cache_bindless_sampler_map_.emplace(
|
||||
sampler_parameters.value, sampler_index);
|
||||
}
|
||||
current_sampler_bindless_indices_pixel_[j] = sampler_index;
|
||||
}
|
||||
|
|
|
@ -186,19 +186,17 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// render targets or copying to depth render targets.
|
||||
void SetSamplePositions(xenos::MsaaSamples sample_positions);
|
||||
|
||||
// Returns a pipeline state object with deferred creation by its handle. May
|
||||
// return nullptr if failed to create the pipeline state object.
|
||||
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
|
||||
void* handle) const {
|
||||
return pipeline_cache_->GetD3D12PipelineStateByHandle(handle);
|
||||
// Returns a pipeline with deferred creation by its handle. May return nullptr
|
||||
// if failed to create the pipeline.
|
||||
inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
|
||||
return pipeline_cache_->GetD3D12PipelineByHandle(handle);
|
||||
}
|
||||
|
||||
// Sets the current pipeline state to a compute one. This is for cache
|
||||
// invalidation primarily. A submission must be open.
|
||||
void SetComputePipelineState(ID3D12PipelineState* pipeline_state);
|
||||
// Sets the current pipeline to a compute one. This is for cache invalidation
|
||||
// primarily. A submission must be open.
|
||||
void SetComputePipeline(ID3D12PipelineState* pipeline);
|
||||
|
||||
// For the pipeline state cache to call when binding layout UIDs may be
|
||||
// reused.
|
||||
// For the pipeline cache to call when binding layout UIDs may be reused.
|
||||
void NotifyShaderBindingsLayoutUIDsInvalidated();
|
||||
|
||||
// Returns the text to display in the GPU backend name in the window title.
|
||||
|
@ -323,8 +321,8 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
bool EndSubmission(bool is_swap);
|
||||
// Checks if ending a submission right now would not cause potentially more
|
||||
// delay than it would reduce by making the GPU start working earlier - such
|
||||
// as when there are unfinished graphics pipeline state creation requests that
|
||||
// would need to be fulfilled before actually submitting the command list.
|
||||
// as when there are unfinished graphics pipeline creation requests that would
|
||||
// need to be fulfilled before actually submitting the command list.
|
||||
bool CanEndSubmissionImmediately() const;
|
||||
bool AwaitAllQueueOperationsCompletion() {
|
||||
CheckSubmissionFence(submission_current_);
|
||||
|
@ -548,13 +546,12 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// Current SSAA sample positions (to be updated by the render target cache).
|
||||
xenos::MsaaSamples current_sample_positions_;
|
||||
|
||||
// Currently bound pipeline state, either a graphics pipeline state object
|
||||
// from the pipeline state cache (with potentially deferred creation -
|
||||
// current_external_pipeline_state_ is nullptr in this case) or a non-Xenos
|
||||
// graphics or compute pipeline state object (current_cached_pipeline_state_
|
||||
// is nullptr in this case).
|
||||
void* current_cached_pipeline_state_;
|
||||
ID3D12PipelineState* current_external_pipeline_state_;
|
||||
// Currently bound pipeline, either a graphics pipeline from the pipeline
|
||||
// cache (with potentially deferred creation - current_external_pipeline_ is
|
||||
// nullptr in this case) or a non-Xenos graphics or compute pipeline
|
||||
// (current_cached_pipeline_ is nullptr in this case).
|
||||
void* current_cached_pipeline_;
|
||||
ID3D12PipelineState* current_external_pipeline_;
|
||||
|
||||
// Currently bound graphics root signature.
|
||||
ID3D12RootSignature* current_graphics_root_signature_;
|
||||
|
|
|
@ -157,7 +157,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
|
|||
stretch_pipeline_desc.SampleDesc.Count = 1;
|
||||
if (FAILED(device->CreateGraphicsPipelineState(
|
||||
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) {
|
||||
XELOGE("Failed to create the front buffer stretch pipeline state");
|
||||
XELOGE("Failed to create the front buffer stretch pipeline");
|
||||
stretch_gamma_root_signature_->Release();
|
||||
stretch_gamma_root_signature_ = nullptr;
|
||||
stretch_root_signature_->Release();
|
||||
|
@ -170,8 +170,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
|
|||
if (FAILED(device->CreateGraphicsPipelineState(
|
||||
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) {
|
||||
XELOGE(
|
||||
"Failed to create the gamma-correcting front buffer stretch "
|
||||
"pipeline state");
|
||||
"Failed to create the gamma-correcting front buffer stretch pipeline");
|
||||
stretch_pipeline_->Release();
|
||||
stretch_pipeline_ = nullptr;
|
||||
stretch_gamma_root_signature_->Release();
|
||||
|
|
|
@ -85,7 +85,7 @@ class D3D12Shader : public Shader {
|
|||
return sampler_bindings_.data();
|
||||
}
|
||||
|
||||
// For owning subsystems like the pipeline state cache, accessors for unique
|
||||
// For owning subsystems like the pipeline cache, accessors for unique
|
||||
// identifiers (used instead of hashes to make sure collisions can't happen)
|
||||
// of binding layouts used by the shader, for invalidation if a shader with an
|
||||
// incompatible layout was bound.
|
||||
|
|
|
@ -209,8 +209,7 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
|
|||
}
|
||||
} break;
|
||||
case Command::kSetPipelineStateHandle: {
|
||||
current_pipeline_state =
|
||||
command_processor_.GetD3D12PipelineStateByHandle(
|
||||
current_pipeline_state = command_processor_.GetD3D12PipelineByHandle(
|
||||
*reinterpret_cast<void* const*>(stream));
|
||||
if (current_pipeline_state) {
|
||||
command_list->SetPipelineState(current_pipeline_state);
|
||||
|
|
|
@ -43,10 +43,10 @@ DEFINE_bool(
|
|||
"D3D12");
|
||||
DEFINE_int32(
|
||||
d3d12_pipeline_creation_threads, -1,
|
||||
"Number of threads used for graphics pipeline state object creation. -1 to "
|
||||
"calculate automatically (75% of logical CPU cores), a positive number to "
|
||||
"specify the number of threads explicitly (up to the number of logical CPU "
|
||||
"cores), 0 to disable multithreaded pipeline state object creation.",
|
||||
"Number of threads used for graphics pipeline creation. -1 to calculate "
|
||||
"automatically (75% of logical CPU cores), a positive number to specify "
|
||||
"the number of threads explicitly (up to the number of logical CPU cores), "
|
||||
"0 to disable multithreaded pipeline creation.",
|
||||
"D3D12");
|
||||
DEFINE_bool(d3d12_tessellation_wireframe, false,
|
||||
"Display tessellated surfaces as wireframe for debugging.",
|
||||
|
@ -125,8 +125,8 @@ bool PipelineCache::Initialize() {
|
|||
logical_processor_count = 6;
|
||||
}
|
||||
// Initialize creation thread synchronization data even if not using creation
|
||||
// threads because they may be used anyway to create pipeline state objects
|
||||
// from the storage.
|
||||
// threads because they may be used anyway to create pipelines from the
|
||||
// storage.
|
||||
creation_threads_busy_ = 0;
|
||||
creation_completion_event_ =
|
||||
xe::threading::Event::CreateManualResetEvent(true);
|
||||
|
@ -145,7 +145,7 @@ bool PipelineCache::Initialize() {
|
|||
for (size_t i = 0; i < creation_thread_count; ++i) {
|
||||
std::unique_ptr<xe::threading::Thread> creation_thread =
|
||||
xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); });
|
||||
creation_thread->set_name("D3D12 Pipeline States");
|
||||
creation_thread->set_name("D3D12 Pipelines");
|
||||
creation_threads_.push_back(std::move(creation_thread));
|
||||
}
|
||||
}
|
||||
|
@ -184,13 +184,12 @@ void PipelineCache::ClearCache(bool shutting_down) {
|
|||
}
|
||||
ShutdownShaderStorage();
|
||||
|
||||
// Remove references to the current pipeline state object.
|
||||
current_pipeline_state_ = nullptr;
|
||||
// Remove references to the current pipeline.
|
||||
current_pipeline_ = nullptr;
|
||||
|
||||
if (!creation_threads_.empty()) {
|
||||
// Empty the pipeline state object creation queue and make sure there are no
|
||||
// threads currently creating pipeline state objects because pipeline states
|
||||
// are going to be deleted.
|
||||
// Empty the pipeline creation queue and make sure there are no threads
|
||||
// currently creating pipelines because pipelines are going to be deleted.
|
||||
bool await_creation_completion_event = false;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
|
@ -207,13 +206,13 @@ void PipelineCache::ClearCache(bool shutting_down) {
|
|||
}
|
||||
}
|
||||
|
||||
// Destroy all pipeline state objects.
|
||||
for (auto it : pipeline_states_) {
|
||||
// Destroy all pipelines.
|
||||
for (auto it : pipelines_) {
|
||||
it.second->state->Release();
|
||||
delete it.second;
|
||||
}
|
||||
pipeline_states_.clear();
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0);
|
||||
pipelines_.clear();
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipelines", 0);
|
||||
|
||||
// Destroy all shaders.
|
||||
command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated();
|
||||
|
@ -223,10 +222,10 @@ void PipelineCache::ClearCache(bool shutting_down) {
|
|||
}
|
||||
texture_binding_layout_map_.clear();
|
||||
texture_binding_layouts_.clear();
|
||||
for (auto it : shader_map_) {
|
||||
for (auto it : shaders_) {
|
||||
delete it.second;
|
||||
}
|
||||
shader_map_.clear();
|
||||
shaders_.clear();
|
||||
|
||||
if (reinitialize_shader_storage) {
|
||||
InitializeShaderStorage(shader_storage_root, shader_storage_title_id,
|
||||
|
@ -374,8 +373,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
size_t ucode_byte_count =
|
||||
shader_header.ucode_dword_count * sizeof(uint32_t);
|
||||
if (shader_map_.find(shader_header.ucode_data_hash) !=
|
||||
shader_map_.end()) {
|
||||
if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) {
|
||||
// Already added - usually shaders aren't added without the intention of
|
||||
// translating them imminently, so don't do additional checks to
|
||||
// actually ensure that translation happens right now (they would cause
|
||||
|
@ -402,7 +400,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
D3D12Shader* shader =
|
||||
new D3D12Shader(shader_header.type, ucode_data_hash,
|
||||
ucode_dwords.data(), shader_header.ucode_dword_count);
|
||||
shader_map_.insert({ucode_data_hash, shader});
|
||||
shaders_.emplace(ucode_data_hash, shader);
|
||||
// Create new threads if the currently existing threads can't keep up with
|
||||
// file reading, but not more than the number of logical processors minus
|
||||
// one.
|
||||
|
@ -439,7 +437,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
shader_translation_threads.clear();
|
||||
for (D3D12Shader* shader : shaders_failed_to_translate) {
|
||||
shader_map_.erase(shader->ucode_data_hash());
|
||||
shaders_.erase(shader->ucode_data_hash());
|
||||
delete shader;
|
||||
}
|
||||
}
|
||||
|
@ -460,72 +458,66 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
|
||||
// 'DXRO' or 'DXRT'.
|
||||
const uint32_t pipeline_state_storage_magic_api =
|
||||
const uint32_t pipeline_storage_magic_api =
|
||||
edram_rov_used_ ? 0x4F525844 : 0x54525844;
|
||||
|
||||
// Initialize the pipeline state storage stream.
|
||||
uint64_t pipeline_state_storage_initialization_start_ =
|
||||
// Initialize the pipeline storage stream.
|
||||
uint64_t pipeline_storage_initialization_start_ =
|
||||
xe::Clock::QueryHostTickCount();
|
||||
auto pipeline_state_storage_file_path =
|
||||
auto pipeline_storage_file_path =
|
||||
shader_storage_shareable_root /
|
||||
fmt::format("{:08X}.{}.d3d12.xpso", title_id,
|
||||
edram_rov_used_ ? "rov" : "rtv");
|
||||
pipeline_state_storage_file_ =
|
||||
xe::filesystem::OpenFile(pipeline_state_storage_file_path, "a+b");
|
||||
if (!pipeline_state_storage_file_) {
|
||||
pipeline_storage_file_ =
|
||||
xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b");
|
||||
if (!pipeline_storage_file_) {
|
||||
XELOGE(
|
||||
"Failed to open the Direct3D 12 pipeline state description storage "
|
||||
"file for writing, persistent shader storage will be disabled: {}",
|
||||
xe::path_to_utf8(pipeline_state_storage_file_path));
|
||||
"Failed to open the Direct3D 12 pipeline description storage file for "
|
||||
"writing, persistent shader storage will be disabled: {}",
|
||||
xe::path_to_utf8(pipeline_storage_file_path));
|
||||
fclose(shader_storage_file_);
|
||||
shader_storage_file_ = nullptr;
|
||||
return;
|
||||
}
|
||||
pipeline_state_storage_file_flush_needed_ = false;
|
||||
pipeline_storage_file_flush_needed_ = false;
|
||||
// 'XEPS'.
|
||||
const uint32_t pipeline_state_storage_magic = 0x53504558;
|
||||
const uint32_t pipeline_storage_magic = 0x53504558;
|
||||
struct {
|
||||
uint32_t magic;
|
||||
uint32_t magic_api;
|
||||
uint32_t version_swapped;
|
||||
} pipeline_state_storage_file_header;
|
||||
if (fread(&pipeline_state_storage_file_header,
|
||||
sizeof(pipeline_state_storage_file_header), 1,
|
||||
pipeline_state_storage_file_) &&
|
||||
pipeline_state_storage_file_header.magic ==
|
||||
pipeline_state_storage_magic &&
|
||||
pipeline_state_storage_file_header.magic_api ==
|
||||
pipeline_state_storage_magic_api &&
|
||||
xe::byte_swap(pipeline_state_storage_file_header.version_swapped) ==
|
||||
} pipeline_storage_file_header;
|
||||
if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
|
||||
1, pipeline_storage_file_) &&
|
||||
pipeline_storage_file_header.magic == pipeline_storage_magic &&
|
||||
pipeline_storage_file_header.magic_api == pipeline_storage_magic_api &&
|
||||
xe::byte_swap(pipeline_storage_file_header.version_swapped) ==
|
||||
PipelineDescription::kVersion) {
|
||||
uint64_t pipeline_state_storage_valid_bytes =
|
||||
sizeof(pipeline_state_storage_file_header);
|
||||
// Enqueue pipeline state descriptions written by previous Xenia executions
|
||||
// until the end of the file or until a corrupted one is detected.
|
||||
xe::filesystem::Seek(pipeline_state_storage_file_, 0, SEEK_END);
|
||||
int64_t pipeline_state_storage_told_end =
|
||||
xe::filesystem::Tell(pipeline_state_storage_file_);
|
||||
size_t pipeline_state_storage_told_count =
|
||||
size_t(pipeline_state_storage_told_end >=
|
||||
int64_t(pipeline_state_storage_valid_bytes)
|
||||
? (uint64_t(pipeline_state_storage_told_end) -
|
||||
pipeline_state_storage_valid_bytes) /
|
||||
uint64_t pipeline_storage_valid_bytes =
|
||||
sizeof(pipeline_storage_file_header);
|
||||
// Enqueue pipeline descriptions written by previous Xenia executions until
|
||||
// the end of the file or until a corrupted one is detected.
|
||||
xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END);
|
||||
int64_t pipeline_storage_told_end =
|
||||
xe::filesystem::Tell(pipeline_storage_file_);
|
||||
size_t pipeline_storage_told_count = size_t(
|
||||
pipeline_storage_told_end >= int64_t(pipeline_storage_valid_bytes)
|
||||
? (uint64_t(pipeline_storage_told_end) -
|
||||
pipeline_storage_valid_bytes) /
|
||||
sizeof(PipelineStoredDescription)
|
||||
: 0);
|
||||
if (pipeline_state_storage_told_count &&
|
||||
xe::filesystem::Seek(pipeline_state_storage_file_,
|
||||
int64_t(pipeline_state_storage_valid_bytes),
|
||||
SEEK_SET)) {
|
||||
if (pipeline_storage_told_count &&
|
||||
xe::filesystem::Seek(pipeline_storage_file_,
|
||||
int64_t(pipeline_storage_valid_bytes), SEEK_SET)) {
|
||||
std::vector<PipelineStoredDescription> pipeline_stored_descriptions;
|
||||
pipeline_stored_descriptions.resize(pipeline_state_storage_told_count);
|
||||
pipeline_stored_descriptions.resize(fread(
|
||||
pipeline_stored_descriptions.data(),
|
||||
sizeof(PipelineStoredDescription), pipeline_state_storage_told_count,
|
||||
pipeline_state_storage_file_));
|
||||
pipeline_stored_descriptions.resize(pipeline_storage_told_count);
|
||||
pipeline_stored_descriptions.resize(
|
||||
fread(pipeline_stored_descriptions.data(),
|
||||
sizeof(PipelineStoredDescription), pipeline_storage_told_count,
|
||||
pipeline_storage_file_));
|
||||
if (!pipeline_stored_descriptions.empty()) {
|
||||
// Launch additional creation threads to use all cores to create
|
||||
// pipeline state objects faster. Will also be using the main thread, so
|
||||
// minus 1.
|
||||
// pipelines faster. Will also be using the main thread, so minus 1.
|
||||
size_t creation_thread_original_count = creation_threads_.size();
|
||||
size_t creation_thread_needed_count =
|
||||
std::max(std::min(pipeline_stored_descriptions.size(),
|
||||
|
@ -539,10 +531,10 @@ void PipelineCache::InitializeShaderStorage(
|
|||
{}, [this, creation_thread_index]() {
|
||||
CreationThread(creation_thread_index);
|
||||
});
|
||||
creation_thread->set_name("D3D12 Pipeline States Additional");
|
||||
creation_thread->set_name("D3D12 Pipelines");
|
||||
creation_threads_.push_back(std::move(creation_thread));
|
||||
}
|
||||
size_t pipeline_states_created = 0;
|
||||
size_t pipelines_created = 0;
|
||||
for (const PipelineStoredDescription& pipeline_stored_description :
|
||||
pipeline_stored_descriptions) {
|
||||
const PipelineDescription& pipeline_description =
|
||||
|
@ -554,30 +546,28 @@ void PipelineCache::InitializeShaderStorage(
|
|||
0) != pipeline_stored_description.description_hash) {
|
||||
break;
|
||||
}
|
||||
pipeline_state_storage_valid_bytes +=
|
||||
sizeof(PipelineStoredDescription);
|
||||
// Skip already known pipeline states - those have already been
|
||||
// enqueued.
|
||||
auto found_range = pipeline_states_.equal_range(
|
||||
pipeline_storage_valid_bytes += sizeof(PipelineStoredDescription);
|
||||
// Skip already known pipelines - those have already been enqueued.
|
||||
auto found_range = pipelines_.equal_range(
|
||||
pipeline_stored_description.description_hash);
|
||||
bool pipeline_state_found = false;
|
||||
bool pipeline_found = false;
|
||||
for (auto it = found_range.first; it != found_range.second; ++it) {
|
||||
PipelineState* found_pipeline_state = it->second;
|
||||
if (!std::memcmp(&found_pipeline_state->description.description,
|
||||
Pipeline* found_pipeline = it->second;
|
||||
if (!std::memcmp(&found_pipeline->description.description,
|
||||
&pipeline_description,
|
||||
sizeof(pipeline_description))) {
|
||||
pipeline_state_found = true;
|
||||
pipeline_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pipeline_state_found) {
|
||||
if (pipeline_found) {
|
||||
continue;
|
||||
}
|
||||
|
||||
PipelineRuntimeDescription pipeline_runtime_description;
|
||||
auto vertex_shader_it =
|
||||
shader_map_.find(pipeline_description.vertex_shader_hash);
|
||||
if (vertex_shader_it == shader_map_.end()) {
|
||||
shaders_.find(pipeline_description.vertex_shader_hash);
|
||||
if (vertex_shader_it == shaders_.end()) {
|
||||
continue;
|
||||
}
|
||||
pipeline_runtime_description.vertex_shader = vertex_shader_it->second;
|
||||
|
@ -586,8 +576,8 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
if (pipeline_description.pixel_shader_hash) {
|
||||
auto pixel_shader_it =
|
||||
shader_map_.find(pipeline_description.pixel_shader_hash);
|
||||
if (pixel_shader_it == shader_map_.end()) {
|
||||
shaders_.find(pipeline_description.pixel_shader_hash);
|
||||
if (pixel_shader_it == shaders_.end()) {
|
||||
continue;
|
||||
}
|
||||
pipeline_runtime_description.pixel_shader = pixel_shader_it->second;
|
||||
|
@ -607,36 +597,33 @@ void PipelineCache::InitializeShaderStorage(
|
|||
std::memcpy(&pipeline_runtime_description.description,
|
||||
&pipeline_description, sizeof(pipeline_description));
|
||||
|
||||
PipelineState* new_pipeline_state = new PipelineState;
|
||||
new_pipeline_state->state = nullptr;
|
||||
std::memcpy(&new_pipeline_state->description,
|
||||
&pipeline_runtime_description,
|
||||
Pipeline* new_pipeline = new Pipeline;
|
||||
new_pipeline->state = nullptr;
|
||||
std::memcpy(&new_pipeline->description, &pipeline_runtime_description,
|
||||
sizeof(pipeline_runtime_description));
|
||||
pipeline_states_.insert(
|
||||
std::make_pair(pipeline_stored_description.description_hash,
|
||||
new_pipeline_state));
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
|
||||
pipeline_states_.size());
|
||||
pipelines_.emplace(pipeline_stored_description.description_hash,
|
||||
new_pipeline);
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
|
||||
if (!creation_threads_.empty()) {
|
||||
// Submit the pipeline for creation to any available thread.
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
creation_queue_.push_back(new_pipeline_state);
|
||||
creation_queue_.push_back(new_pipeline);
|
||||
}
|
||||
creation_request_cond_.notify_one();
|
||||
} else {
|
||||
new_pipeline_state->state =
|
||||
CreateD3D12PipelineState(pipeline_runtime_description);
|
||||
new_pipeline->state =
|
||||
CreateD3D12Pipeline(pipeline_runtime_description);
|
||||
}
|
||||
++pipeline_states_created;
|
||||
++pipelines_created;
|
||||
}
|
||||
CreateQueuedPipelineStatesOnProcessorThread();
|
||||
CreateQueuedPipelinesOnProcessorThread();
|
||||
if (creation_threads_.size() > creation_thread_original_count) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
creation_threads_shutdown_from_ = creation_thread_original_count;
|
||||
// Assuming the queue is empty because of
|
||||
// CreateQueuedPipelineStatesOnProcessorThread.
|
||||
// CreateQueuedPipelinesOnProcessorThread.
|
||||
}
|
||||
creation_request_cond_.notify_all();
|
||||
while (creation_threads_.size() > creation_thread_original_count) {
|
||||
|
@ -664,26 +651,23 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
}
|
||||
XELOGGPU(
|
||||
"Created {} graphics pipeline state objects from the storage in {} "
|
||||
"milliseconds",
|
||||
pipeline_states_created,
|
||||
"Created {} graphics pipelines from the storage in {} milliseconds",
|
||||
pipelines_created,
|
||||
(xe::Clock::QueryHostTickCount() -
|
||||
pipeline_state_storage_initialization_start_) *
|
||||
pipeline_storage_initialization_start_) *
|
||||
1000 / xe::Clock::QueryHostTickFrequency());
|
||||
}
|
||||
}
|
||||
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_,
|
||||
pipeline_state_storage_valid_bytes);
|
||||
xe::filesystem::TruncateStdioFile(pipeline_storage_file_,
|
||||
pipeline_storage_valid_bytes);
|
||||
} else {
|
||||
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, 0);
|
||||
pipeline_state_storage_file_header.magic = pipeline_state_storage_magic;
|
||||
pipeline_state_storage_file_header.magic_api =
|
||||
pipeline_state_storage_magic_api;
|
||||
pipeline_state_storage_file_header.version_swapped =
|
||||
xe::filesystem::TruncateStdioFile(pipeline_storage_file_, 0);
|
||||
pipeline_storage_file_header.magic = pipeline_storage_magic;
|
||||
pipeline_storage_file_header.magic_api = pipeline_storage_magic_api;
|
||||
pipeline_storage_file_header.version_swapped =
|
||||
xe::byte_swap(PipelineDescription::kVersion);
|
||||
fwrite(&pipeline_state_storage_file_header,
|
||||
sizeof(pipeline_state_storage_file_header), 1,
|
||||
pipeline_state_storage_file_);
|
||||
fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
|
||||
1, pipeline_storage_file_);
|
||||
}
|
||||
|
||||
shader_storage_root_ = storage_root;
|
||||
|
@ -691,7 +675,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
|
||||
// Start the storage writing thread.
|
||||
storage_write_flush_shaders_ = false;
|
||||
storage_write_flush_pipeline_states_ = false;
|
||||
storage_write_flush_pipelines_ = false;
|
||||
storage_write_thread_shutdown_ = false;
|
||||
storage_write_thread_ =
|
||||
xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); });
|
||||
|
@ -708,12 +692,12 @@ void PipelineCache::ShutdownShaderStorage() {
|
|||
storage_write_thread_.reset();
|
||||
}
|
||||
storage_write_shader_queue_.clear();
|
||||
storage_write_pipeline_state_queue_.clear();
|
||||
storage_write_pipeline_queue_.clear();
|
||||
|
||||
if (pipeline_state_storage_file_) {
|
||||
fclose(pipeline_state_storage_file_);
|
||||
pipeline_state_storage_file_ = nullptr;
|
||||
pipeline_state_storage_file_flush_needed_ = false;
|
||||
if (pipeline_storage_file_) {
|
||||
fclose(pipeline_storage_file_);
|
||||
pipeline_storage_file_ = nullptr;
|
||||
pipeline_storage_file_flush_needed_ = false;
|
||||
}
|
||||
|
||||
if (shader_storage_file_) {
|
||||
|
@ -728,30 +712,29 @@ void PipelineCache::ShutdownShaderStorage() {
|
|||
|
||||
void PipelineCache::EndSubmission() {
|
||||
if (shader_storage_file_flush_needed_ ||
|
||||
pipeline_state_storage_file_flush_needed_) {
|
||||
pipeline_storage_file_flush_needed_) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
|
||||
if (shader_storage_file_flush_needed_) {
|
||||
storage_write_flush_shaders_ = true;
|
||||
}
|
||||
if (pipeline_state_storage_file_flush_needed_) {
|
||||
storage_write_flush_pipeline_states_ = true;
|
||||
if (pipeline_storage_file_flush_needed_) {
|
||||
storage_write_flush_pipelines_ = true;
|
||||
}
|
||||
}
|
||||
storage_write_request_cond_.notify_one();
|
||||
shader_storage_file_flush_needed_ = false;
|
||||
pipeline_state_storage_file_flush_needed_ = false;
|
||||
pipeline_storage_file_flush_needed_ = false;
|
||||
}
|
||||
if (!creation_threads_.empty()) {
|
||||
CreateQueuedPipelineStatesOnProcessorThread();
|
||||
// Await creation of all queued pipeline state objects.
|
||||
CreateQueuedPipelinesOnProcessorThread();
|
||||
// Await creation of all queued pipelines.
|
||||
bool await_creation_completion_event;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
// Assuming the creation queue is already empty (because the processor
|
||||
// thread also worked on creating the leftover pipeline state objects), so
|
||||
// only check if there are threads with pipeline state objects currently
|
||||
// being created.
|
||||
// thread also worked on creating the leftover pipelines), so only check
|
||||
// if there are threads with pipelines currently being created.
|
||||
await_creation_completion_event = creation_threads_busy_ != 0;
|
||||
if (await_creation_completion_event) {
|
||||
creation_completion_event_->Reset();
|
||||
|
@ -765,7 +748,7 @@ void PipelineCache::EndSubmission() {
|
|||
}
|
||||
}
|
||||
|
||||
bool PipelineCache::IsCreatingPipelineStates() {
|
||||
bool PipelineCache::IsCreatingPipelines() {
|
||||
if (creation_threads_.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -779,8 +762,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
uint32_t dword_count) {
|
||||
// Hash the input memory and lookup the shader.
|
||||
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
|
||||
auto it = shader_map_.find(data_hash);
|
||||
if (it != shader_map_.end()) {
|
||||
auto it = shaders_.find(data_hash);
|
||||
if (it != shaders_.end()) {
|
||||
// Shader has been previously loaded.
|
||||
return it->second;
|
||||
}
|
||||
|
@ -790,7 +773,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
// again.
|
||||
D3D12Shader* shader =
|
||||
new D3D12Shader(shader_type, data_hash, host_address, dword_count);
|
||||
shader_map_.insert({data_hash, shader});
|
||||
shaders_.emplace(data_hash, shader);
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
@ -798,11 +781,11 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid()
|
||||
const {
|
||||
// If the values this functions returns are changed, INVALIDATE THE SHADER
|
||||
// STORAGE (increase kVersion for BOTH shaders and pipeline states)! The
|
||||
// exception is when the function originally returned "unsupported", but
|
||||
// started to return a valid value (in this case the shader wouldn't be cached
|
||||
// in the first place). Otherwise games will not be able to locate shaders for
|
||||
// draws for which the host vertex shader type has changed!
|
||||
// STORAGE (increase kVersion for BOTH shaders and pipelines)! The exception
|
||||
// is when the function originally returned "unsupported", but started to
|
||||
// return a valid value (in this case the shader wouldn't be cached in the
|
||||
// first place). Otherwise games will not be able to locate shaders for draws
|
||||
// for which the host vertex shader type has changed!
|
||||
const auto& regs = register_file_;
|
||||
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
||||
if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode,
|
||||
|
@ -929,13 +912,12 @@ bool PipelineCache::ConfigurePipeline(
|
|||
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
|
||||
bool early_z,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
void** pipeline_state_handle_out,
|
||||
ID3D12RootSignature** root_signature_out) {
|
||||
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) {
|
||||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
assert_not_null(pipeline_state_handle_out);
|
||||
assert_not_null(pipeline_handle_out);
|
||||
assert_not_null(root_signature_out);
|
||||
|
||||
PipelineRuntimeDescription runtime_description;
|
||||
|
@ -946,24 +928,24 @@ bool PipelineCache::ConfigurePipeline(
|
|||
}
|
||||
PipelineDescription& description = runtime_description.description;
|
||||
|
||||
if (current_pipeline_state_ != nullptr &&
|
||||
!std::memcmp(¤t_pipeline_state_->description.description,
|
||||
&description, sizeof(description))) {
|
||||
*pipeline_state_handle_out = current_pipeline_state_;
|
||||
if (current_pipeline_ != nullptr &&
|
||||
!std::memcmp(¤t_pipeline_->description.description, &description,
|
||||
sizeof(description))) {
|
||||
*pipeline_handle_out = current_pipeline_;
|
||||
*root_signature_out = runtime_description.root_signature;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Find an existing pipeline state object in the cache.
|
||||
// Find an existing pipeline in the cache.
|
||||
uint64_t hash = XXH64(&description, sizeof(description), 0);
|
||||
auto found_range = pipeline_states_.equal_range(hash);
|
||||
auto found_range = pipelines_.equal_range(hash);
|
||||
for (auto it = found_range.first; it != found_range.second; ++it) {
|
||||
PipelineState* found_pipeline_state = it->second;
|
||||
if (!std::memcmp(&found_pipeline_state->description.description,
|
||||
&description, sizeof(description))) {
|
||||
current_pipeline_state_ = found_pipeline_state;
|
||||
*pipeline_state_handle_out = found_pipeline_state;
|
||||
*root_signature_out = found_pipeline_state->description.root_signature;
|
||||
Pipeline* found_pipeline = it->second;
|
||||
if (!std::memcmp(&found_pipeline->description.description, &description,
|
||||
sizeof(description))) {
|
||||
current_pipeline_ = found_pipeline;
|
||||
*pipeline_handle_out = found_pipeline;
|
||||
*root_signature_out = found_pipeline->description.root_signature;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -974,33 +956,32 @@ bool PipelineCache::ConfigurePipeline(
|
|||
return false;
|
||||
}
|
||||
|
||||
PipelineState* new_pipeline_state = new PipelineState;
|
||||
new_pipeline_state->state = nullptr;
|
||||
std::memcpy(&new_pipeline_state->description, &runtime_description,
|
||||
Pipeline* new_pipeline = new Pipeline;
|
||||
new_pipeline->state = nullptr;
|
||||
std::memcpy(&new_pipeline->description, &runtime_description,
|
||||
sizeof(runtime_description));
|
||||
pipeline_states_.insert(std::make_pair(hash, new_pipeline_state));
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
|
||||
pipeline_states_.size());
|
||||
pipelines_.emplace(hash, new_pipeline);
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
|
||||
|
||||
if (!creation_threads_.empty()) {
|
||||
// Submit the pipeline state object for creation to any available thread.
|
||||
// Submit the pipeline for creation to any available thread.
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
creation_queue_.push_back(new_pipeline_state);
|
||||
creation_queue_.push_back(new_pipeline);
|
||||
}
|
||||
creation_request_cond_.notify_one();
|
||||
} else {
|
||||
new_pipeline_state->state = CreateD3D12PipelineState(runtime_description);
|
||||
new_pipeline->state = CreateD3D12Pipeline(runtime_description);
|
||||
}
|
||||
|
||||
if (pipeline_state_storage_file_) {
|
||||
if (pipeline_storage_file_) {
|
||||
assert_not_null(storage_write_thread_);
|
||||
pipeline_state_storage_file_flush_needed_ = true;
|
||||
pipeline_storage_file_flush_needed_ = true;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
|
||||
storage_write_pipeline_state_queue_.emplace_back();
|
||||
storage_write_pipeline_queue_.emplace_back();
|
||||
PipelineStoredDescription& stored_description =
|
||||
storage_write_pipeline_state_queue_.back();
|
||||
storage_write_pipeline_queue_.back();
|
||||
stored_description.description_hash = hash;
|
||||
std::memcpy(&stored_description.description, &description,
|
||||
sizeof(description));
|
||||
|
@ -1008,8 +989,8 @@ bool PipelineCache::ConfigurePipeline(
|
|||
storage_write_request_cond_.notify_all();
|
||||
}
|
||||
|
||||
current_pipeline_state_ = new_pipeline_state;
|
||||
*pipeline_state_handle_out = new_pipeline_state;
|
||||
current_pipeline_ = new_pipeline;
|
||||
*pipeline_handle_out = new_pipeline;
|
||||
*root_signature_out = runtime_description.root_signature;
|
||||
return true;
|
||||
}
|
||||
|
@ -1136,8 +1117,8 @@ bool PipelineCache::TranslateShader(
|
|||
std::memcpy(
|
||||
texture_binding_layouts_.data() + new_uid.vector_span_offset,
|
||||
texture_bindings, texture_binding_layout_bytes);
|
||||
texture_binding_layout_map_.insert(
|
||||
{texture_binding_layout_hash, new_uid});
|
||||
texture_binding_layout_map_.emplace(texture_binding_layout_hash,
|
||||
new_uid);
|
||||
}
|
||||
}
|
||||
if (bindless_sampler_count) {
|
||||
|
@ -1179,8 +1160,8 @@ bool PipelineCache::TranslateShader(
|
|||
vector_bindless_sampler_layout[i] =
|
||||
sampler_bindings[i].bindless_descriptor_index;
|
||||
}
|
||||
bindless_sampler_layout_map_.insert(
|
||||
{bindless_sampler_layout_hash, new_uid});
|
||||
bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash,
|
||||
new_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1508,8 +1489,7 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
/* 16 */ PipelineBlendFactor::kSrcAlphaSat,
|
||||
};
|
||||
// Like kBlendFactorMap, but with color modes changed to alpha. Some
|
||||
// pipeline state objects aren't created in Prey because a color mode is
|
||||
// used for alpha.
|
||||
// pipelines aren't created in Prey because a color mode is used for alpha.
|
||||
static const PipelineBlendFactor kBlendFactorAlphaMap[32] = {
|
||||
/* 0 */ PipelineBlendFactor::kZero,
|
||||
/* 1 */ PipelineBlendFactor::kOne,
|
||||
|
@ -1569,18 +1549,16 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
return true;
|
||||
}
|
||||
|
||||
ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
|
||||
ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
|
||||
const PipelineRuntimeDescription& runtime_description) {
|
||||
const PipelineDescription& description = runtime_description.description;
|
||||
|
||||
if (runtime_description.pixel_shader != nullptr) {
|
||||
XELOGGPU(
|
||||
"Creating graphics pipeline state with VS {:016X}"
|
||||
", PS {:016X}",
|
||||
XELOGGPU("Creating graphics pipeline with VS {:016X}, PS {:016X}",
|
||||
runtime_description.vertex_shader->ucode_data_hash(),
|
||||
runtime_description.pixel_shader->ucode_data_hash());
|
||||
} else {
|
||||
XELOGGPU("Creating graphics pipeline state with VS {:016X}",
|
||||
XELOGGPU("Creating graphics pipeline with VS {:016X}",
|
||||
runtime_description.vertex_shader->ucode_data_hash());
|
||||
}
|
||||
|
||||
|
@ -1893,20 +1871,18 @@ ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
|
|||
}
|
||||
}
|
||||
|
||||
// Create the pipeline state object.
|
||||
// Create the D3D12 pipeline state object.
|
||||
auto device =
|
||||
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
|
||||
ID3D12PipelineState* state;
|
||||
if (FAILED(device->CreateGraphicsPipelineState(&state_desc,
|
||||
IID_PPV_ARGS(&state)))) {
|
||||
if (runtime_description.pixel_shader != nullptr) {
|
||||
XELOGE(
|
||||
"Failed to create graphics pipeline state with VS {:016X}"
|
||||
", PS {:016X}",
|
||||
XELOGE("Failed to create graphics pipeline with VS {:016X}, PS {:016X}",
|
||||
runtime_description.vertex_shader->ucode_data_hash(),
|
||||
runtime_description.pixel_shader->ucode_data_hash());
|
||||
} else {
|
||||
XELOGE("Failed to create graphics pipeline state with VS {:016X}",
|
||||
XELOGE("Failed to create graphics pipeline with VS {:016X}",
|
||||
runtime_description.vertex_shader->ucode_data_hash());
|
||||
}
|
||||
return nullptr;
|
||||
|
@ -1933,7 +1909,7 @@ void PipelineCache::StorageWriteThread() {
|
|||
ucode_guest_endian.reserve(0xFFFF);
|
||||
|
||||
bool flush_shaders = false;
|
||||
bool flush_pipeline_states = false;
|
||||
bool flush_pipelines = false;
|
||||
|
||||
while (true) {
|
||||
if (flush_shaders) {
|
||||
|
@ -1941,15 +1917,15 @@ void PipelineCache::StorageWriteThread() {
|
|||
assert_not_null(shader_storage_file_);
|
||||
fflush(shader_storage_file_);
|
||||
}
|
||||
if (flush_pipeline_states) {
|
||||
flush_pipeline_states = false;
|
||||
assert_not_null(pipeline_state_storage_file_);
|
||||
fflush(pipeline_state_storage_file_);
|
||||
if (flush_pipelines) {
|
||||
flush_pipelines = false;
|
||||
assert_not_null(pipeline_storage_file_);
|
||||
fflush(pipeline_storage_file_);
|
||||
}
|
||||
|
||||
std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {};
|
||||
PipelineStoredDescription pipeline_description;
|
||||
bool write_pipeline_state = false;
|
||||
bool write_pipeline = false;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(storage_write_request_lock_);
|
||||
if (storage_write_thread_shutdown_) {
|
||||
|
@ -1962,17 +1938,17 @@ void PipelineCache::StorageWriteThread() {
|
|||
storage_write_flush_shaders_ = false;
|
||||
flush_shaders = true;
|
||||
}
|
||||
if (!storage_write_pipeline_state_queue_.empty()) {
|
||||
if (!storage_write_pipeline_queue_.empty()) {
|
||||
std::memcpy(&pipeline_description,
|
||||
&storage_write_pipeline_state_queue_.front(),
|
||||
&storage_write_pipeline_queue_.front(),
|
||||
sizeof(pipeline_description));
|
||||
storage_write_pipeline_state_queue_.pop_front();
|
||||
write_pipeline_state = true;
|
||||
} else if (storage_write_flush_pipeline_states_) {
|
||||
storage_write_flush_pipeline_states_ = false;
|
||||
flush_pipeline_states = true;
|
||||
storage_write_pipeline_queue_.pop_front();
|
||||
write_pipeline = true;
|
||||
} else if (storage_write_flush_pipelines_) {
|
||||
storage_write_flush_pipelines_ = false;
|
||||
flush_pipelines = true;
|
||||
}
|
||||
if (!shader_pair.first && !write_pipeline_state) {
|
||||
if (!shader_pair.first && !write_pipeline) {
|
||||
storage_write_request_cond_.wait(lock);
|
||||
continue;
|
||||
}
|
||||
|
@ -1999,27 +1975,26 @@ void PipelineCache::StorageWriteThread() {
|
|||
}
|
||||
}
|
||||
|
||||
if (write_pipeline_state) {
|
||||
assert_not_null(pipeline_state_storage_file_);
|
||||
if (write_pipeline) {
|
||||
assert_not_null(pipeline_storage_file_);
|
||||
fwrite(&pipeline_description, sizeof(pipeline_description), 1,
|
||||
pipeline_state_storage_file_);
|
||||
pipeline_storage_file_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::CreationThread(size_t thread_index) {
|
||||
while (true) {
|
||||
PipelineState* pipeline_state_to_create = nullptr;
|
||||
Pipeline* pipeline_to_create = nullptr;
|
||||
|
||||
// Check if need to shut down or set the completion event and dequeue the
|
||||
// pipeline state if there is any.
|
||||
// pipeline if there is any.
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(creation_request_lock_);
|
||||
if (thread_index >= creation_threads_shutdown_from_ ||
|
||||
creation_queue_.empty()) {
|
||||
if (creation_completion_set_event_ && creation_threads_busy_ == 0) {
|
||||
// Last pipeline state object in the queue created - signal the event
|
||||
// if requested.
|
||||
// Last pipeline in the queue created - signal the event if requested.
|
||||
creation_completion_set_event_ = false;
|
||||
creation_completion_event_->Set();
|
||||
}
|
||||
|
@ -2029,23 +2004,22 @@ void PipelineCache::CreationThread(size_t thread_index) {
|
|||
creation_request_cond_.wait(lock);
|
||||
continue;
|
||||
}
|
||||
// Take the pipeline state from the queue and increment the busy thread
|
||||
// count until the pipeline state object is created - other threads must
|
||||
// be able to dequeue requests, but can't set the completion event until
|
||||
// the pipeline state objects are fully created (rather than just started
|
||||
// creating).
|
||||
pipeline_state_to_create = creation_queue_.front();
|
||||
// Take the pipeline from the queue and increment the busy thread count
|
||||
// until the pipeline is created - other threads must be able to dequeue
|
||||
// requests, but can't set the completion event until the pipelines are
|
||||
// fully created (rather than just started creating).
|
||||
pipeline_to_create = creation_queue_.front();
|
||||
creation_queue_.pop_front();
|
||||
++creation_threads_busy_;
|
||||
}
|
||||
|
||||
// Create the D3D12 pipeline state object.
|
||||
pipeline_state_to_create->state =
|
||||
CreateD3D12PipelineState(pipeline_state_to_create->description);
|
||||
pipeline_to_create->state =
|
||||
CreateD3D12Pipeline(pipeline_to_create->description);
|
||||
|
||||
// Pipeline state object created - the thread is not busy anymore, safe to
|
||||
// set the completion event if needed (at the next iteration, or in some
|
||||
// other thread).
|
||||
// Pipeline created - the thread is not busy anymore, safe to set the
|
||||
// completion event if needed (at the next iteration, or in some other
|
||||
// thread).
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
--creation_threads_busy_;
|
||||
|
@ -2053,20 +2027,20 @@ void PipelineCache::CreationThread(size_t thread_index) {
|
|||
}
|
||||
}
|
||||
|
||||
void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() {
|
||||
void PipelineCache::CreateQueuedPipelinesOnProcessorThread() {
|
||||
assert_false(creation_threads_.empty());
|
||||
while (true) {
|
||||
PipelineState* pipeline_state_to_create;
|
||||
Pipeline* pipeline_to_create;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
if (creation_queue_.empty()) {
|
||||
break;
|
||||
}
|
||||
pipeline_state_to_create = creation_queue_.front();
|
||||
pipeline_to_create = creation_queue_.front();
|
||||
creation_queue_.pop_front();
|
||||
}
|
||||
pipeline_state_to_create->state =
|
||||
CreateD3D12PipelineState(pipeline_state_to_create->description);
|
||||
pipeline_to_create->state =
|
||||
CreateD3D12Pipeline(pipeline_to_create->description);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -54,7 +55,7 @@ class PipelineCache {
|
|||
void ShutdownShaderStorage();
|
||||
|
||||
void EndSubmission();
|
||||
bool IsCreatingPipelineStates();
|
||||
bool IsCreatingPipelines();
|
||||
|
||||
D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address,
|
||||
const uint32_t* host_address, uint32_t dword_count);
|
||||
|
@ -73,14 +74,12 @@ class PipelineCache {
|
|||
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
|
||||
bool early_z,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
void** pipeline_state_handle_out,
|
||||
ID3D12RootSignature** root_signature_out);
|
||||
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
|
||||
|
||||
// Returns a pipeline state object with deferred creation by its handle. May
|
||||
// return nullptr if failed to create the pipeline state object.
|
||||
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
|
||||
void* handle) const {
|
||||
return reinterpret_cast<const PipelineState*>(handle)->state;
|
||||
// Returns a pipeline with deferred creation by its handle. May return nullptr
|
||||
// if failed to create the pipeline.
|
||||
inline ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
|
||||
return reinterpret_cast<const Pipeline*>(handle)->state;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -237,7 +236,7 @@ class PipelineCache {
|
|||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
PipelineRuntimeDescription& runtime_description_out);
|
||||
|
||||
ID3D12PipelineState* CreateD3D12PipelineState(
|
||||
ID3D12PipelineState* CreateD3D12Pipeline(
|
||||
const PipelineRuntimeDescription& runtime_description);
|
||||
|
||||
D3D12CommandProcessor& command_processor_;
|
||||
|
@ -255,9 +254,9 @@ class PipelineCache {
|
|||
IDxcUtils* dxc_utils_ = nullptr;
|
||||
IDxcCompiler* dxc_compiler_ = nullptr;
|
||||
|
||||
// All loaded shaders mapped by their guest hash key.
|
||||
// Ucode hash -> shader.
|
||||
std::unordered_map<uint64_t, D3D12Shader*, xe::hash::IdentityHasher<uint64_t>>
|
||||
shader_map_;
|
||||
shaders_;
|
||||
|
||||
struct LayoutUID {
|
||||
size_t uid;
|
||||
|
@ -285,21 +284,20 @@ class PipelineCache {
|
|||
// Xenos pixel shader provided.
|
||||
std::vector<uint8_t> depth_only_pixel_shader_;
|
||||
|
||||
struct PipelineState {
|
||||
struct Pipeline {
|
||||
// nullptr if creation has failed.
|
||||
ID3D12PipelineState* state;
|
||||
PipelineRuntimeDescription description;
|
||||
};
|
||||
// All previously generated pipeline state objects identified by hash and the
|
||||
// description.
|
||||
std::unordered_multimap<uint64_t, PipelineState*,
|
||||
// All previously generated pipelines identified by hash and the description.
|
||||
std::unordered_multimap<uint64_t, Pipeline*,
|
||||
xe::hash::IdentityHasher<uint64_t>>
|
||||
pipeline_states_;
|
||||
pipelines_;
|
||||
|
||||
// Previously used pipeline state object. This matches our current state
|
||||
// settings and allows us to quickly(ish) reuse the pipeline state if no
|
||||
// registers have changed.
|
||||
PipelineState* current_pipeline_state_ = nullptr;
|
||||
// Previously used pipeline. This matches our current state settings and
|
||||
// allows us to quickly(ish) reuse the pipeline if no registers have been
|
||||
// changed.
|
||||
Pipeline* current_pipeline_ = nullptr;
|
||||
|
||||
// Currently open shader storage path.
|
||||
std::filesystem::path shader_storage_root_;
|
||||
|
@ -309,10 +307,9 @@ class PipelineCache {
|
|||
FILE* shader_storage_file_ = nullptr;
|
||||
bool shader_storage_file_flush_needed_ = false;
|
||||
|
||||
// Pipeline state storage output stream, for preload in the next emulator
|
||||
// runs.
|
||||
FILE* pipeline_state_storage_file_ = nullptr;
|
||||
bool pipeline_state_storage_file_flush_needed_ = false;
|
||||
// Pipeline storage output stream, for preload in the next emulator runs.
|
||||
FILE* pipeline_storage_file_ = nullptr;
|
||||
bool pipeline_storage_file_flush_needed_ = false;
|
||||
|
||||
// Thread for asynchronous writing to the storage streams.
|
||||
void StorageWriteThread();
|
||||
|
@ -322,28 +319,27 @@ class PipelineCache {
|
|||
// thread is notified about its change via storage_write_request_cond_.
|
||||
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
|
||||
storage_write_shader_queue_;
|
||||
std::deque<PipelineStoredDescription> storage_write_pipeline_state_queue_;
|
||||
std::deque<PipelineStoredDescription> storage_write_pipeline_queue_;
|
||||
bool storage_write_flush_shaders_ = false;
|
||||
bool storage_write_flush_pipeline_states_ = false;
|
||||
bool storage_write_flush_pipelines_ = false;
|
||||
bool storage_write_thread_shutdown_ = false;
|
||||
std::unique_ptr<xe::threading::Thread> storage_write_thread_;
|
||||
|
||||
// Pipeline state object creation threads.
|
||||
// Pipeline creation threads.
|
||||
void CreationThread(size_t thread_index);
|
||||
void CreateQueuedPipelineStatesOnProcessorThread();
|
||||
void CreateQueuedPipelinesOnProcessorThread();
|
||||
std::mutex creation_request_lock_;
|
||||
std::condition_variable creation_request_cond_;
|
||||
// Protected with creation_request_lock_, notify_one creation_request_cond_
|
||||
// when set.
|
||||
std::deque<PipelineState*> creation_queue_;
|
||||
// Number of threads that are currently creating a pipeline state object -
|
||||
// incremented when a pipeline state object is dequeued (the completion event
|
||||
// can't be triggered before this is zero). Protected with
|
||||
// creation_request_lock_.
|
||||
std::deque<Pipeline*> creation_queue_;
|
||||
// Number of threads that are currently creating a pipeline - incremented when
|
||||
// a pipeline is dequeued (the completion event can't be triggered before this
|
||||
// is zero). Protected with creation_request_lock_.
|
||||
size_t creation_threads_busy_ = 0;
|
||||
// Manual-reset event set when the last queued pipeline state object is
|
||||
// created and there are no more pipeline state objects to create. This is
|
||||
// triggered by the thread creating the last pipeline state object.
|
||||
// Manual-reset event set when the last queued pipeline is created and there
|
||||
// are no more pipelines to create. This is triggered by the thread creating
|
||||
// the last pipeline.
|
||||
std::unique_ptr<xe::threading::Event> creation_completion_event_;
|
||||
// Whether setting the event on completion is queued. Protected with
|
||||
// creation_request_lock_, notify_one creation_request_cond_ when set.
|
||||
|
|
|
@ -454,8 +454,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
|||
// again and again and exit.
|
||||
if (!conversion_needed || converted_index_count == 0) {
|
||||
converted_indices.gpu_address = 0;
|
||||
converted_indices_cache_.insert(
|
||||
std::make_pair(converted_indices.key.value, converted_indices));
|
||||
converted_indices_cache_.emplace(converted_indices.key.value,
|
||||
converted_indices);
|
||||
memory_regions_used_ |= memory_regions_used_bits;
|
||||
return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty
|
||||
: ConversionResult::kConversionNotNeeded;
|
||||
|
@ -670,8 +670,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
|||
|
||||
// Cache and return the indices.
|
||||
converted_indices.gpu_address = gpu_address;
|
||||
converted_indices_cache_.insert(
|
||||
std::make_pair(converted_indices.key.value, converted_indices));
|
||||
converted_indices_cache_.emplace(converted_indices.key.value,
|
||||
converted_indices);
|
||||
memory_regions_used_ |= memory_regions_used_bits;
|
||||
gpu_address_out = gpu_address;
|
||||
index_count_out = converted_index_count;
|
||||
|
|
|
@ -277,19 +277,18 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Create the EDRAM load/store pipeline state objects.
|
||||
// Create the EDRAM load/store pipelines.
|
||||
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
|
||||
const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i];
|
||||
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState(
|
||||
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
|
||||
device, mode_info.load_shader, mode_info.load_shader_size,
|
||||
edram_load_store_root_signature_);
|
||||
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState(
|
||||
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
|
||||
device, mode_info.store_shader, mode_info.store_shader_size,
|
||||
edram_load_store_root_signature_);
|
||||
if (edram_load_pipelines_[i] == nullptr ||
|
||||
edram_store_pipelines_[i] == nullptr) {
|
||||
XELOGE(
|
||||
"Failed to create the EDRAM load/store pipeline states for mode {}",
|
||||
XELOGE("Failed to create the EDRAM load/store pipelines for mode {}",
|
||||
i);
|
||||
Shutdown();
|
||||
return false;
|
||||
|
@ -299,7 +298,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|||
}
|
||||
}
|
||||
|
||||
// Create the resolve root signatures and pipeline state objects.
|
||||
// Create the resolve root signatures and pipelines.
|
||||
D3D12_ROOT_PARAMETER resolve_root_parameters[3];
|
||||
|
||||
// Copying root signature.
|
||||
|
@ -369,7 +368,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Copying pipeline state objects.
|
||||
// Copying pipelines.
|
||||
uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1;
|
||||
for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount);
|
||||
++i) {
|
||||
|
@ -381,63 +380,61 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|||
continue;
|
||||
}
|
||||
const auto& resolve_copy_shader = resolve_copy_shaders_[i];
|
||||
ID3D12PipelineState* resolve_copy_pipeline_state =
|
||||
ui::d3d12::util::CreateComputePipelineState(
|
||||
ID3D12PipelineState* resolve_copy_pipeline =
|
||||
ui::d3d12::util::CreateComputePipeline(
|
||||
device, resolve_copy_shader.first, resolve_copy_shader.second,
|
||||
resolve_copy_root_signature_);
|
||||
if (resolve_copy_pipeline_state == nullptr) {
|
||||
XELOGE("Failed to create {} resolve copy pipeline state",
|
||||
if (resolve_copy_pipeline == nullptr) {
|
||||
XELOGE("Failed to create {} resolve copy pipeline",
|
||||
resolve_copy_shader_info.debug_name);
|
||||
}
|
||||
resolve_copy_pipeline_state->SetName(reinterpret_cast<LPCWSTR>(
|
||||
resolve_copy_pipeline->SetName(reinterpret_cast<LPCWSTR>(
|
||||
xe::to_utf16(resolve_copy_shader_info.debug_name).c_str()));
|
||||
resolve_copy_pipeline_states_[i] = resolve_copy_pipeline_state;
|
||||
resolve_copy_pipelines_[i] = resolve_copy_pipeline;
|
||||
}
|
||||
|
||||
// Clearing pipeline state objects.
|
||||
resolve_clear_32bpp_pipeline_state_ =
|
||||
ui::d3d12::util::CreateComputePipelineState(
|
||||
// Clearing pipelines.
|
||||
resolve_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
|
||||
device,
|
||||
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
|
||||
: resolve_clear_32bpp_cs,
|
||||
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
|
||||
: sizeof(resolve_clear_32bpp_cs),
|
||||
resolve_clear_root_signature_);
|
||||
if (resolve_clear_32bpp_pipeline_state_ == nullptr) {
|
||||
XELOGE("Failed to create the 32bpp resolve clear pipeline state");
|
||||
if (resolve_clear_32bpp_pipeline_ == nullptr) {
|
||||
XELOGE("Failed to create the 32bpp resolve clear pipeline");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
resolve_clear_32bpp_pipeline_state_->SetName(L"Resolve Clear 32bpp");
|
||||
resolve_clear_64bpp_pipeline_state_ =
|
||||
ui::d3d12::util::CreateComputePipelineState(
|
||||
resolve_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp");
|
||||
resolve_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
|
||||
device,
|
||||
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
|
||||
: resolve_clear_64bpp_cs,
|
||||
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
|
||||
: sizeof(resolve_clear_64bpp_cs),
|
||||
resolve_clear_root_signature_);
|
||||
if (resolve_clear_64bpp_pipeline_state_ == nullptr) {
|
||||
XELOGE("Failed to create the 64bpp resolve clear pipeline state");
|
||||
if (resolve_clear_64bpp_pipeline_ == nullptr) {
|
||||
XELOGE("Failed to create the 64bpp resolve clear pipeline");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
resolve_clear_64bpp_pipeline_state_->SetName(L"Resolve Clear 64bpp");
|
||||
resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp");
|
||||
if (!edram_rov_used_) {
|
||||
assert_false(resolution_scale_2x_);
|
||||
resolve_clear_depth_24_32_pipeline_state_ =
|
||||
ui::d3d12::util::CreateComputePipelineState(
|
||||
resolve_clear_depth_24_32_pipeline_ =
|
||||
ui::d3d12::util::CreateComputePipeline(
|
||||
device, resolve_clear_depth_24_32_cs,
|
||||
sizeof(resolve_clear_depth_24_32_cs),
|
||||
resolve_clear_root_signature_);
|
||||
if (resolve_clear_depth_24_32_pipeline_state_ == nullptr) {
|
||||
if (resolve_clear_depth_24_32_pipeline_ == nullptr) {
|
||||
XELOGE(
|
||||
"Failed to create the 24-bit and 32-bit depth resolve clear pipeline "
|
||||
"state");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
resolve_clear_64bpp_pipeline_state_->SetName(
|
||||
resolve_clear_64bpp_pipeline_->SetName(
|
||||
L"Resolve Clear 24-bit & 32-bit Depth");
|
||||
}
|
||||
|
||||
|
@ -451,12 +448,12 @@ void RenderTargetCache::Shutdown() {
|
|||
|
||||
edram_snapshot_restore_pool_.reset();
|
||||
ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_state_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_state_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_state_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_);
|
||||
for (size_t i = 0; i < xe::countof(resolve_copy_pipeline_states_); ++i) {
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipeline_states_[i]);
|
||||
for (size_t i = 0; i < xe::countof(resolve_copy_pipelines_); ++i) {
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipelines_[i]);
|
||||
}
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_);
|
||||
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
|
||||
|
@ -1209,8 +1206,8 @@ bool RenderTargetCache::Resolve(const Memory& memory,
|
|||
0, sizeof(copy_shader_constants) / sizeof(uint32_t),
|
||||
©_shader_constants, 0);
|
||||
}
|
||||
command_processor_.SetComputePipelineState(
|
||||
resolve_copy_pipeline_states_[size_t(copy_shader)]);
|
||||
command_processor_.SetComputePipeline(
|
||||
resolve_copy_pipelines_[size_t(copy_shader)]);
|
||||
command_processor_.SubmitBarriers();
|
||||
command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1);
|
||||
|
||||
|
@ -1279,9 +1276,9 @@ bool RenderTargetCache::Resolve(const Memory& memory,
|
|||
command_list.D3DSetComputeRoot32BitConstants(
|
||||
0, sizeof(depth_clear_constants) / sizeof(uint32_t),
|
||||
&depth_clear_constants, 0);
|
||||
command_processor_.SetComputePipelineState(
|
||||
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_state_
|
||||
: resolve_clear_32bpp_pipeline_state_);
|
||||
command_processor_.SetComputePipeline(
|
||||
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_
|
||||
: resolve_clear_32bpp_pipeline_);
|
||||
command_processor_.SubmitBarriers();
|
||||
command_list.D3DDispatch(clear_group_count.first,
|
||||
clear_group_count.second, 1);
|
||||
|
@ -1301,10 +1298,10 @@ bool RenderTargetCache::Resolve(const Memory& memory,
|
|||
0, sizeof(color_clear_constants) / sizeof(uint32_t),
|
||||
&color_clear_constants, 0);
|
||||
}
|
||||
command_processor_.SetComputePipelineState(
|
||||
command_processor_.SetComputePipeline(
|
||||
resolve_info.color_edram_info.format_is_64bpp
|
||||
? resolve_clear_64bpp_pipeline_state_
|
||||
: resolve_clear_32bpp_pipeline_state_);
|
||||
? resolve_clear_64bpp_pipeline_
|
||||
: resolve_clear_32bpp_pipeline_);
|
||||
command_processor_.SubmitBarriers();
|
||||
command_list.D3DDispatch(clear_group_count.first,
|
||||
clear_group_count.second, 1);
|
||||
|
@ -1816,7 +1813,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
|||
render_target->footprints, nullptr, nullptr,
|
||||
©_buffer_size);
|
||||
render_target->copy_buffer_size = uint32_t(copy_buffer_size);
|
||||
render_targets_.insert(std::make_pair(key.value, render_target));
|
||||
render_targets_.emplace(key.value, render_target);
|
||||
COUNT_profile_set("gpu/render_target_cache/render_targets",
|
||||
render_targets_.size());
|
||||
#if 0
|
||||
|
@ -2015,8 +2012,7 @@ void RenderTargetCache::StoreRenderTargetsToEdram() {
|
|||
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
||||
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
|
||||
render_target->key.format);
|
||||
command_processor_.SetComputePipelineState(
|
||||
edram_store_pipelines_[size_t(mode)]);
|
||||
command_processor_.SetComputePipeline(edram_store_pipelines_[size_t(mode)]);
|
||||
// 1 group per 80x16 samples.
|
||||
command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1);
|
||||
|
||||
|
@ -2140,8 +2136,7 @@ void RenderTargetCache::LoadRenderTargetsFromEdram(
|
|||
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
||||
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
|
||||
render_target->key.format);
|
||||
command_processor_.SetComputePipelineState(
|
||||
edram_load_pipelines_[size_t(mode)]);
|
||||
command_processor_.SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
|
||||
// 1 group per 80x16 samples.
|
||||
command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1);
|
||||
|
||||
|
|
|
@ -237,14 +237,13 @@ class D3D12CommandProcessor;
|
|||
// get each of the 4 host pixels for each sample.
|
||||
class RenderTargetCache {
|
||||
public:
|
||||
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
|
||||
// that contradict each other when you use null RTV descriptors - if you set
|
||||
// a valid format in RTVFormats in the pipeline state, it says that null
|
||||
// descriptors can only be used if the format in the pipeline state is
|
||||
// DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains
|
||||
// that the format in the pipeline doesn't match the RTV format. So we have to
|
||||
// make render target bindings consecutive and remap the output indices in
|
||||
// pixel shaders.
|
||||
// Direct3D 12 debug layer is giving errors that contradict each other when
|
||||
// you use null RTV descriptors - if you set a valid format in RTVFormats in
|
||||
// the pipeline state, it says that null descriptors can only be used if the
|
||||
// format in the pipeline state is DXGI_FORMAT_UNKNOWN, however, if
|
||||
// DXGI_FORMAT_UNKNOWN is set, it complains that the format in the pipeline
|
||||
// state doesn't match the RTV format. So we have to make render target
|
||||
// bindings consecutive and remap the output indices in pixel shaders.
|
||||
struct PipelineRenderTarget {
|
||||
uint32_t guest_render_target;
|
||||
DXGI_FORMAT format;
|
||||
|
@ -537,7 +536,7 @@ class RenderTargetCache {
|
|||
// 16: - EDRAM pitch in tiles.
|
||||
uint32_t base_samples_2x_depth_pitch;
|
||||
};
|
||||
// EDRAM pipeline states for the RTV/DSV path.
|
||||
// EDRAM pipelines for the RTV/DSV path.
|
||||
static const EdramLoadStoreModeInfo
|
||||
edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)];
|
||||
ID3D12PipelineState*
|
||||
|
@ -546,20 +545,20 @@ class RenderTargetCache {
|
|||
ID3D12PipelineState*
|
||||
edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {};
|
||||
|
||||
// Resolve root signatures and pipeline state objects.
|
||||
// Resolve root signatures and pipelines.
|
||||
ID3D12RootSignature* resolve_copy_root_signature_ = nullptr;
|
||||
static const std::pair<const uint8_t*, size_t>
|
||||
resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)];
|
||||
ID3D12PipelineState* resolve_copy_pipeline_states_[size_t(
|
||||
ID3D12PipelineState* resolve_copy_pipelines_[size_t(
|
||||
draw_util::ResolveCopyShaderIndex::kCount)] = {};
|
||||
ID3D12RootSignature* resolve_clear_root_signature_ = nullptr;
|
||||
// Clearing 32bpp color, depth with ROV, or unorm depth without ROV.
|
||||
ID3D12PipelineState* resolve_clear_32bpp_pipeline_state_ = nullptr;
|
||||
ID3D12PipelineState* resolve_clear_32bpp_pipeline_ = nullptr;
|
||||
// Clearing 64bpp color.
|
||||
ID3D12PipelineState* resolve_clear_64bpp_pipeline_state_ = nullptr;
|
||||
ID3D12PipelineState* resolve_clear_64bpp_pipeline_ = nullptr;
|
||||
// Clearing float depth without ROV, both the float24 and the host float32
|
||||
// versions.
|
||||
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_state_ = nullptr;
|
||||
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_ = nullptr;
|
||||
|
||||
// FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on
|
||||
// Nvidia Maxwell 1st generation and older.
|
||||
|
|
|
@ -918,27 +918,24 @@ bool TextureCache::Initialize(bool edram_rov_used) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Create the loading pipeline state objects.
|
||||
// Create the loading pipelines.
|
||||
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
|
||||
const LoadModeInfo& mode_info = load_mode_info_[i];
|
||||
load_pipeline_states_[i] = ui::d3d12::util::CreateComputePipelineState(
|
||||
load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
|
||||
device, mode_info.shader, mode_info.shader_size, load_root_signature_);
|
||||
if (load_pipeline_states_[i] == nullptr) {
|
||||
XELOGE(
|
||||
"Failed to create the texture loading pipeline state object for mode "
|
||||
"{}",
|
||||
i);
|
||||
if (load_pipelines_[i] == nullptr) {
|
||||
XELOGE("Failed to create the texture loading pipeline for mode {}", i);
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) {
|
||||
load_pipeline_states_2x_[i] = ui::d3d12::util::CreateComputePipelineState(
|
||||
load_pipelines_2x_[i] = ui::d3d12::util::CreateComputePipeline(
|
||||
device, mode_info.shader_2x, mode_info.shader_2x_size,
|
||||
load_root_signature_);
|
||||
if (load_pipeline_states_2x_[i] == nullptr) {
|
||||
if (load_pipelines_2x_[i] == nullptr) {
|
||||
XELOGE(
|
||||
"Failed to create the 2x-scaled texture loading pipeline state "
|
||||
"for mode {}",
|
||||
"Failed to create the 2x-scaled texture loading pipeline for mode "
|
||||
"{}",
|
||||
i);
|
||||
Shutdown();
|
||||
return false;
|
||||
|
@ -1024,8 +1021,8 @@ void TextureCache::Shutdown() {
|
|||
ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_);
|
||||
|
||||
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
|
||||
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_2x_[i]);
|
||||
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_[i]);
|
||||
ui::d3d12::util::ReleaseAndNull(load_pipelines_2x_[i]);
|
||||
ui::d3d12::util::ReleaseAndNull(load_pipelines_[i]);
|
||||
}
|
||||
ui::d3d12::util::ReleaseAndNull(load_root_signature_);
|
||||
|
||||
|
@ -1892,7 +1889,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
|||
if (IsResolutionScale2X() && key.tiled) {
|
||||
LoadMode load_mode = GetLoadMode(key);
|
||||
if (load_mode != LoadMode::kUnknown &&
|
||||
load_pipeline_states_2x_[uint32_t(load_mode)] != nullptr) {
|
||||
load_pipelines_2x_[uint32_t(load_mode)] != nullptr) {
|
||||
uint32_t base_size = 0, mip_size = 0;
|
||||
texture_util::GetTextureTotalSize(
|
||||
key.dimension, key.width, key.height, key.depth, key.format,
|
||||
|
@ -2047,7 +2044,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
|||
}
|
||||
texture->base_watch_handle = nullptr;
|
||||
texture->mip_watch_handle = nullptr;
|
||||
textures_.insert(std::make_pair(map_key, texture));
|
||||
textures_.emplace(map_key, texture);
|
||||
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
|
||||
textures_total_size_ += texture->resource_size;
|
||||
COUNT_profile_set("gpu/texture_cache/total_size_mb",
|
||||
|
@ -2079,10 +2076,10 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
return false;
|
||||
}
|
||||
bool scaled_resolve = texture->key.scaled_resolve ? true : false;
|
||||
ID3D12PipelineState* pipeline_state =
|
||||
scaled_resolve ? load_pipeline_states_2x_[uint32_t(load_mode)]
|
||||
: load_pipeline_states_[uint32_t(load_mode)];
|
||||
if (pipeline_state == nullptr) {
|
||||
ID3D12PipelineState* pipeline = scaled_resolve
|
||||
? load_pipelines_2x_[uint32_t(load_mode)]
|
||||
: load_pipelines_[uint32_t(load_mode)];
|
||||
if (pipeline == nullptr) {
|
||||
return false;
|
||||
}
|
||||
const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)];
|
||||
|
@ -2296,7 +2293,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
load_mode_info.srv_bpe_log2);
|
||||
}
|
||||
}
|
||||
command_processor_.SetComputePipelineState(pipeline_state);
|
||||
command_processor_.SetComputePipeline(pipeline);
|
||||
command_list.D3DSetComputeRootSignature(load_root_signature_);
|
||||
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second);
|
||||
|
||||
|
@ -2597,7 +2594,7 @@ uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture,
|
|||
}
|
||||
device->CreateShaderResourceView(
|
||||
texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index));
|
||||
texture.srv_descriptors.insert({descriptor_key, descriptor_index});
|
||||
texture.srv_descriptors.emplace(descriptor_key, descriptor_index);
|
||||
return descriptor_index;
|
||||
}
|
||||
|
||||
|
|
|
@ -550,9 +550,9 @@ class TextureCache {
|
|||
|
||||
static const LoadModeInfo load_mode_info_[];
|
||||
ID3D12RootSignature* load_root_signature_ = nullptr;
|
||||
ID3D12PipelineState* load_pipeline_states_[size_t(LoadMode::kCount)] = {};
|
||||
// Load pipeline state objects for 2x-scaled resolved targets.
|
||||
ID3D12PipelineState* load_pipeline_states_2x_[size_t(LoadMode::kCount)] = {};
|
||||
ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {};
|
||||
// Load pipelines for 2x-scaled resolved targets.
|
||||
ID3D12PipelineState* load_pipelines_2x_[size_t(LoadMode::kCount)] = {};
|
||||
|
||||
std::unordered_multimap<uint64_t, Texture*> textures_;
|
||||
uint64_t textures_total_size_ = 0;
|
||||
|
|
|
@ -99,8 +99,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0),
|
||||
DxbcSrc::LU(~uint32_t(3)));
|
||||
}
|
||||
// Add the word offset from the instruction, plus the offset of the first
|
||||
// needed word within the element.
|
||||
// Add the word offset from the instruction (signed), plus the offset of the
|
||||
// first needed word within the element.
|
||||
uint32_t first_word_index;
|
||||
xe::bit_scan_forward(needed_words, &first_word_index);
|
||||
int32_t first_word_buffer_offset =
|
||||
|
|
|
@ -65,17 +65,17 @@ enum class InstructionStorageTarget {
|
|||
// disassembly (because oPts.x000 will be assembled, but oPts.x00_ has both
|
||||
// skipped components and zeros, which cannot be encoded, and therefore it will
|
||||
// not).
|
||||
constexpr uint32_t GetInstructionStorageTargetUsedComponents(
|
||||
constexpr uint32_t GetInstructionStorageTargetUsedComponentCount(
|
||||
InstructionStorageTarget target) {
|
||||
switch (target) {
|
||||
case InstructionStorageTarget::kNone:
|
||||
return 0b0000;
|
||||
return 0;
|
||||
case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex:
|
||||
return 0b0111;
|
||||
return 3;
|
||||
case InstructionStorageTarget::kDepth:
|
||||
return 0b0001;
|
||||
return 1;
|
||||
default:
|
||||
return 0b1111;
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -136,8 +136,9 @@ struct InstructionResult {
|
|||
// Returns the write mask containing only components actually present in the
|
||||
// target.
|
||||
uint32_t GetUsedWriteMask() const {
|
||||
return original_write_mask &
|
||||
GetInstructionStorageTargetUsedComponents(storage_target);
|
||||
uint32_t target_component_count =
|
||||
GetInstructionStorageTargetUsedComponentCount(storage_target);
|
||||
return original_write_mask & ((1 << target_component_count) - 1);
|
||||
}
|
||||
// True if the components are in their 'standard' swizzle arrangement (xyzw).
|
||||
bool IsStandardSwizzle() const {
|
||||
|
@ -161,6 +162,28 @@ struct InstructionResult {
|
|||
}
|
||||
return used_components;
|
||||
}
|
||||
// Returns which components of the used write mask are constant, and what
|
||||
// values they have.
|
||||
uint32_t GetUsedConstantComponents(uint32_t& constant_values_out) const {
|
||||
uint32_t constant_components = 0;
|
||||
uint32_t constant_values = 0;
|
||||
uint32_t used_write_mask = GetUsedWriteMask();
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!(used_write_mask & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
SwizzleSource component = components[i];
|
||||
if (component >= SwizzleSource::kX && component <= SwizzleSource::kW) {
|
||||
continue;
|
||||
}
|
||||
constant_components |= 1 << i;
|
||||
if (component == SwizzleSource::k1) {
|
||||
constant_values |= 1 << i;
|
||||
}
|
||||
}
|
||||
constant_values_out = constant_values;
|
||||
return constant_components;
|
||||
}
|
||||
};
|
||||
|
||||
enum class InstructionStorageSource {
|
||||
|
|
|
@ -25,6 +25,9 @@ namespace gpu {
|
|||
// system page size granularity.
|
||||
class SharedMemory {
|
||||
public:
|
||||
static constexpr uint32_t kBufferSizeLog2 = 29;
|
||||
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
|
||||
|
||||
virtual ~SharedMemory();
|
||||
// Call in the implementation-specific ClearCache.
|
||||
virtual void ClearCache();
|
||||
|
@ -98,9 +101,6 @@ class SharedMemory {
|
|||
// destructor.
|
||||
void ShutdownCommon();
|
||||
|
||||
static constexpr uint32_t kBufferSizeLog2 = 29;
|
||||
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
|
||||
|
||||
// Sparse allocations are 4 MB, so not too many of them are allocated, but
|
||||
// also not to waste too much memory for padding (with 16 MB there's too
|
||||
// much).
|
||||
|
|
|
@ -118,15 +118,15 @@ bool D3D12ImmediateDrawer::Initialize() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Create the pipeline states.
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_desc = {};
|
||||
pipeline_state_desc.pRootSignature = root_signature_;
|
||||
pipeline_state_desc.VS.pShaderBytecode = immediate_vs;
|
||||
pipeline_state_desc.VS.BytecodeLength = sizeof(immediate_vs);
|
||||
pipeline_state_desc.PS.pShaderBytecode = immediate_ps;
|
||||
pipeline_state_desc.PS.BytecodeLength = sizeof(immediate_ps);
|
||||
// Create the pipelines.
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {};
|
||||
pipeline_desc.pRootSignature = root_signature_;
|
||||
pipeline_desc.VS.pShaderBytecode = immediate_vs;
|
||||
pipeline_desc.VS.BytecodeLength = sizeof(immediate_vs);
|
||||
pipeline_desc.PS.pShaderBytecode = immediate_ps;
|
||||
pipeline_desc.PS.BytecodeLength = sizeof(immediate_ps);
|
||||
D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc =
|
||||
pipeline_state_desc.BlendState.RenderTarget[0];
|
||||
pipeline_desc.BlendState.RenderTarget[0];
|
||||
pipeline_blend_desc.BlendEnable = TRUE;
|
||||
pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA;
|
||||
pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
|
||||
|
@ -138,11 +138,11 @@ bool D3D12ImmediateDrawer::Initialize() {
|
|||
pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED |
|
||||
D3D12_COLOR_WRITE_ENABLE_GREEN |
|
||||
D3D12_COLOR_WRITE_ENABLE_BLUE;
|
||||
pipeline_state_desc.SampleMask = UINT_MAX;
|
||||
pipeline_state_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
|
||||
pipeline_state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
pipeline_state_desc.RasterizerState.FrontCounterClockwise = FALSE;
|
||||
pipeline_state_desc.RasterizerState.DepthClipEnable = TRUE;
|
||||
pipeline_desc.SampleMask = UINT_MAX;
|
||||
pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
|
||||
pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
pipeline_desc.RasterizerState.FrontCounterClockwise = FALSE;
|
||||
pipeline_desc.RasterizerState.DepthClipEnable = TRUE;
|
||||
D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {};
|
||||
pipeline_input_elements[0].SemanticName = "POSITION";
|
||||
pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT;
|
||||
|
@ -154,26 +154,24 @@ bool D3D12ImmediateDrawer::Initialize() {
|
|||
pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
pipeline_input_elements[2].AlignedByteOffset =
|
||||
offsetof(ImmediateVertex, color);
|
||||
pipeline_state_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
|
||||
pipeline_state_desc.InputLayout.NumElements =
|
||||
pipeline_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
|
||||
pipeline_desc.InputLayout.NumElements =
|
||||
UINT(xe::countof(pipeline_input_elements));
|
||||
pipeline_state_desc.PrimitiveTopologyType =
|
||||
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
||||
pipeline_state_desc.NumRenderTargets = 1;
|
||||
pipeline_state_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
|
||||
pipeline_state_desc.SampleDesc.Count = 1;
|
||||
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
||||
pipeline_desc.NumRenderTargets = 1;
|
||||
pipeline_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
|
||||
pipeline_desc.SampleDesc.Count = 1;
|
||||
if (FAILED(device->CreateGraphicsPipelineState(
|
||||
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_triangle_)))) {
|
||||
&pipeline_desc, IID_PPV_ARGS(&pipeline_triangle_)))) {
|
||||
XELOGE(
|
||||
"Failed to create the Direct3D 12 immediate drawer triangle pipeline "
|
||||
"state");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
pipeline_state_desc.PrimitiveTopologyType =
|
||||
D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
|
||||
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
|
||||
if (FAILED(device->CreateGraphicsPipelineState(
|
||||
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_line_)))) {
|
||||
&pipeline_desc, IID_PPV_ARGS(&pipeline_line_)))) {
|
||||
XELOGE(
|
||||
"Failed to create the Direct3D 12 immediate drawer line pipeline "
|
||||
"state");
|
||||
|
@ -267,8 +265,8 @@ void D3D12ImmediateDrawer::Shutdown() {
|
|||
|
||||
util::ReleaseAndNull(sampler_heap_);
|
||||
|
||||
util::ReleaseAndNull(pipeline_state_line_);
|
||||
util::ReleaseAndNull(pipeline_state_triangle_);
|
||||
util::ReleaseAndNull(pipeline_line_);
|
||||
util::ReleaseAndNull(pipeline_triangle_);
|
||||
|
||||
util::ReleaseAndNull(root_signature_);
|
||||
}
|
||||
|
@ -611,17 +609,17 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
|
|||
uint32_t(sampler_index)));
|
||||
}
|
||||
|
||||
// Set the primitive type and the pipeline state for it.
|
||||
// Set the primitive type and the pipeline for it.
|
||||
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
||||
ID3D12PipelineState* pipeline_state;
|
||||
ID3D12PipelineState* pipeline;
|
||||
switch (draw.primitive_type) {
|
||||
case ImmediatePrimitiveType::kLines:
|
||||
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
|
||||
pipeline_state = pipeline_state_line_;
|
||||
pipeline = pipeline_line_;
|
||||
break;
|
||||
case ImmediatePrimitiveType::kTriangles:
|
||||
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
||||
pipeline_state = pipeline_state_triangle_;
|
||||
pipeline = pipeline_triangle_;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(draw.primitive_type);
|
||||
|
@ -630,7 +628,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
|
|||
if (current_primitive_topology_ != primitive_topology) {
|
||||
current_primitive_topology_ = primitive_topology;
|
||||
current_command_list_->IASetPrimitiveTopology(primitive_topology);
|
||||
current_command_list_->SetPipelineState(pipeline_state);
|
||||
current_command_list_->SetPipelineState(pipeline);
|
||||
}
|
||||
|
||||
// Draw.
|
||||
|
|
|
@ -105,8 +105,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer {
|
|||
kCount
|
||||
};
|
||||
|
||||
ID3D12PipelineState* pipeline_state_triangle_ = nullptr;
|
||||
ID3D12PipelineState* pipeline_state_line_ = nullptr;
|
||||
ID3D12PipelineState* pipeline_triangle_ = nullptr;
|
||||
ID3D12PipelineState* pipeline_line_ = nullptr;
|
||||
|
||||
ID3D12DescriptorHeap* sampler_heap_ = nullptr;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_;
|
||||
|
|
|
@ -47,7 +47,7 @@ ID3D12RootSignature* CreateRootSignature(
|
|||
return root_signature;
|
||||
}
|
||||
|
||||
ID3D12PipelineState* CreateComputePipelineState(
|
||||
ID3D12PipelineState* CreateComputePipeline(
|
||||
ID3D12Device* device, const void* shader, size_t shader_size,
|
||||
ID3D12RootSignature* root_signature) {
|
||||
D3D12_COMPUTE_PIPELINE_STATE_DESC desc;
|
||||
|
|
|
@ -39,8 +39,9 @@ inline bool ReleaseAndNull(T& object) {
|
|||
ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider,
|
||||
const D3D12_ROOT_SIGNATURE_DESC& desc);
|
||||
|
||||
ID3D12PipelineState* CreateComputePipelineState(
|
||||
ID3D12Device* device, const void* shader, size_t shader_size,
|
||||
ID3D12PipelineState* CreateComputePipeline(ID3D12Device* device,
|
||||
const void* shader,
|
||||
size_t shader_size,
|
||||
ID3D12RootSignature* root_signature);
|
||||
|
||||
constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) {
|
||||
|
|
|
@ -71,7 +71,7 @@ void GraphicsUploadBufferPool::FlushWrites() {
|
|||
GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request(
|
||||
uint64_t submission_index, size_t size, size_t alignment,
|
||||
size_t& offset_out) {
|
||||
assert_not_zero(alignment);
|
||||
alignment = std::max(alignment, size_t(1));
|
||||
assert_true(xe::is_pow2(alignment));
|
||||
size = xe::align(size, alignment);
|
||||
assert_true(size <= page_size_);
|
||||
|
@ -126,7 +126,7 @@ GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request(
|
|||
GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::RequestPartial(
|
||||
uint64_t submission_index, size_t size, size_t alignment,
|
||||
size_t& offset_out, size_t& size_out) {
|
||||
assert_not_zero(alignment);
|
||||
alignment = std::max(alignment, size_t(1));
|
||||
assert_true(xe::is_pow2(alignment));
|
||||
size = xe::align(size, alignment);
|
||||
size = std::min(size, page_size_);
|
||||
|
|
Loading…
Reference in New Issue