From 26cf7173945bff4226c50dfd1eb4cc1b04dfcacd Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 14 May 2022 18:28:32 +0300 Subject: [PATCH 1/5] [GPU] Make TextureCache constructors explicit --- src/xenia/gpu/d3d12/d3d12_texture_cache.h | 22 +++++++++++----------- src/xenia/gpu/texture_cache.h | 9 +++++---- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.h b/src/xenia/gpu/d3d12/d3d12_texture_cache.h index 0f1164922..a398cd941 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.h +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.h @@ -268,9 +268,9 @@ class D3D12TextureCache final : public TextureCache { class D3D12Texture final : public Texture { public: - D3D12Texture(D3D12TextureCache& texture_cache, const TextureKey& key, - ID3D12Resource* resource, - D3D12_RESOURCE_STATES resource_state); + explicit D3D12Texture(D3D12TextureCache& texture_cache, + const TextureKey& key, ID3D12Resource* resource, + D3D12_RESOURCE_STATES resource_state); ~D3D12Texture(); ID3D12Resource* resource() const { return resource_.Get(); } @@ -346,8 +346,8 @@ class D3D12TextureCache final : public TextureCache { class ScaledResolveVirtualBuffer { public: - ScaledResolveVirtualBuffer(ID3D12Resource* resource, - D3D12_RESOURCE_STATES resource_state) + explicit ScaledResolveVirtualBuffer(ID3D12Resource* resource, + D3D12_RESOURCE_STATES resource_state) : resource_(resource), resource_state_(resource_state) {} ID3D12Resource* resource() const { return resource_.Get(); } D3D12_RESOURCE_STATES SetResourceState(D3D12_RESOURCE_STATES new_state) { @@ -373,12 +373,12 @@ class D3D12TextureCache final : public TextureCache { bool uav_barrier_pending_ = false; }; - D3D12TextureCache(const RegisterFile& register_file, - D3D12SharedMemory& shared_memory, - uint32_t draw_resolution_scale_x, - uint32_t draw_resolution_scale_y, - D3D12CommandProcessor& command_processor, - bool bindless_resources_used); + explicit D3D12TextureCache(const RegisterFile& register_file, + D3D12SharedMemory& shared_memory, + uint32_t draw_resolution_scale_x, + uint32_t draw_resolution_scale_y, + D3D12CommandProcessor& command_processor, + bool bindless_resources_used); bool Initialize(); diff --git a/src/xenia/gpu/texture_cache.h b/src/xenia/gpu/texture_cache.h index 475cdfdfc..1802eaaca 100644 --- a/src/xenia/gpu/texture_cache.h +++ b/src/xenia/gpu/texture_cache.h @@ -248,7 +248,7 @@ class TextureCache { void LogAction(const char* action) const; protected: - Texture(TextureCache& texture_cache, const TextureKey& key); + explicit Texture(TextureCache& texture_cache, const TextureKey& key); void SetHostMemoryUsage(uint64_t new_host_memory_usage) { texture_cache_.UpdateTexturesTotalHostMemoryUsage(new_host_memory_usage, @@ -420,9 +420,10 @@ class TextureCache { } }; - TextureCache(const RegisterFile& register_file, SharedMemory& shared_memory, - uint32_t draw_resolution_scale_x, - uint32_t draw_resolution_scale_y); + explicit TextureCache(const RegisterFile& register_file, + SharedMemory& shared_memory, + uint32_t draw_resolution_scale_x, + uint32_t draw_resolution_scale_y); const RegisterFile& register_file() const { return register_file_; } SharedMemory& shared_memory() const { return shared_memory_; } From d6a905695238a1c60cc3b85f92ea58bcd6788e4f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 14 May 2022 18:41:15 +0300 Subject: [PATCH 2/5] [D3D12] D3D12Texture::SRVDescriptorKey structure --- src/xenia/gpu/d3d12/d3d12_texture_cache.cc | 4 ++- src/xenia/gpu/d3d12/d3d12_texture_cache.h | 30 +++++++++++++++++++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc index ce38c1cba..72233c79f 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc @@ -1955,7 +1955,9 @@ void D3D12TextureCache::UpdateTextureBindingsImpl( uint32_t D3D12TextureCache::FindOrCreateTextureDescriptor( D3D12Texture& texture, bool is_signed, uint32_t host_swizzle) { - uint32_t descriptor_key = uint32_t(is_signed) | (host_swizzle << 1); + D3D12Texture::SRVDescriptorKey descriptor_key; + descriptor_key.is_signed = uint32_t(is_signed); + descriptor_key.host_swizzle = host_swizzle; // Try to find an existing descriptor. uint32_t existing_descriptor_index = diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.h b/src/xenia/gpu/d3d12/d3d12_texture_cache.h index a398cd941..2dd418fbd 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.h +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.h @@ -11,6 +11,7 @@ #define XENIA_GPU_D3D12_D3D12_TEXTURE_CACHE_H_ #include +#include #include #include #include @@ -268,6 +269,28 @@ class D3D12TextureCache final : public TextureCache { class D3D12Texture final : public Texture { public: + union SRVDescriptorKey { + uint32_t key; + struct { + uint32_t is_signed : 1; + uint32_t host_swizzle : 12; + }; + + SRVDescriptorKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const SRVDescriptorKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const SRVDescriptorKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const SRVDescriptorKey& other_key) const { + return !(*this == other_key); + } + }; + explicit D3D12Texture(D3D12TextureCache& texture_cache, const TextureKey& key, ID3D12Resource* resource, D3D12_RESOURCE_STATES resource_state); @@ -281,12 +304,12 @@ class D3D12TextureCache final : public TextureCache { return old_state; } - uint32_t GetSRVDescriptorIndex(uint32_t descriptor_key) const { + uint32_t GetSRVDescriptorIndex(SRVDescriptorKey descriptor_key) const { auto it = srv_descriptors_.find(descriptor_key); return it != srv_descriptors_.cend() ? it->second : UINT32_MAX; } - void AddSRVDescriptorIndex(uint32_t descriptor_key, + void AddSRVDescriptorIndex(SRVDescriptorKey descriptor_key, uint32_t descriptor_index) { srv_descriptors_.emplace(descriptor_key, descriptor_index); } @@ -299,7 +322,8 @@ class D3D12TextureCache final : public TextureCache { // copying to the shader-visible heap (much faster than recreating, which, // according to profiling, was often a bottleneck in many games). // For bindless - indices in the global shader-visible descriptor heap. - std::unordered_map srv_descriptors_; + std::unordered_map + srv_descriptors_; }; static constexpr uint32_t kSRVDescriptorCachePageSize = 65536; From 60052fb4fc7d76fc9fbfc084ef6e21d4bbb8f473 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 14 May 2022 22:18:21 +0300 Subject: [PATCH 3/5] [Vulkan] Don't require imageViewFormatSwizzle in the immediate drawer --- src/xenia/ui/vulkan/vulkan_immediate_drawer.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index a73e42894..162474762 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -866,6 +866,9 @@ bool VulkanImmediateDrawer::CreateTextureResource( size_t& pending_upload_index_out) { const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); VkDevice device = provider_.device(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider_.device_portability_subset_features(); // Create the image and the descriptor. @@ -907,9 +910,13 @@ bool VulkanImmediateDrawer::CreateTextureResource( image_view_create_info.image = image; image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; image_view_create_info.format = VK_FORMAT_R8G8B8A8_UNORM; - // data == nullptr is a special case for (1, 1, 1, 1). + // data == nullptr is a special case for (1, 1, 1, 1), though the image will + // be cleared to (1, 1, 1, 1) anyway, just a micro-optimization. VkComponentSwizzle swizzle = - data ? VK_COMPONENT_SWIZZLE_IDENTITY : VK_COMPONENT_SWIZZLE_ONE; + (data || (device_portability_subset_features && + !device_portability_subset_features->imageViewFormatSwizzle)) + ? VK_COMPONENT_SWIZZLE_IDENTITY + : VK_COMPONENT_SWIZZLE_ONE; image_view_create_info.components.r = swizzle; image_view_create_info.components.g = swizzle; image_view_create_info.components.b = swizzle; From f9b3b90a686d699e876d3c84071216c5f4db0a74 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 14 May 2022 22:30:06 +0300 Subject: [PATCH 4/5] [D3D12] Subsystem management order cleanup --- .../gpu/d3d12/d3d12_command_processor.cc | 38 +++++++++---------- src/xenia/gpu/d3d12/d3d12_command_processor.h | 4 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index b869513bc..9e549d19b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -862,6 +862,13 @@ bool D3D12CommandProcessor::SetupContext() { draw_resolution_scale_x, draw_resolution_scale_y); } + shared_memory_ = + std::make_unique(*this, *memory_, trace_writer_); + if (!shared_memory_->Initialize()) { + XELOGE("Failed to initialize shared memory"); + return false; + } + // Initialize the render target cache before configuring binding - need to // know if using rasterizer-ordered views for the bindless root signature. render_target_cache_ = std::make_unique( @@ -1144,13 +1151,6 @@ bool D3D12CommandProcessor::SetupContext() { } } - shared_memory_ = - std::make_unique(*this, *memory_, trace_writer_); - if (!shared_memory_->Initialize()) { - XELOGE("Failed to initialize shared memory"); - return false; - } - primitive_processor_ = std::make_unique( *register_file_, *memory_, trace_writer_, *shared_memory_, *this); if (!primitive_processor_->Initialize()) { @@ -1615,13 +1615,11 @@ void D3D12CommandProcessor::ShutdownContext() { gamma_ramp_upload_buffer_.Reset(); gamma_ramp_buffer_.Reset(); - pipeline_cache_.reset(); - texture_cache_.reset(); - primitive_processor_.reset(); + pipeline_cache_.reset(); - shared_memory_.reset(); + primitive_processor_.reset(); // Shut down binding - bindless descriptors may be owned by subsystems like // the texture cache. @@ -1654,6 +1652,8 @@ void D3D12CommandProcessor::ShutdownContext() { render_target_cache_.reset(); + shared_memory_.reset(); + deferred_command_list_.Reset(); ui::d3d12::util::ReleaseAndNull(command_list_1_); ui::d3d12::util::ReleaseAndNull(command_list_); @@ -2787,10 +2787,10 @@ void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) { shared_memory_->CompletedSubmissionUpdated(); - primitive_processor_->CompletedSubmissionUpdated(); - render_target_cache_->CompletedSubmissionUpdated(); + primitive_processor_->CompletedSubmissionUpdated(); + texture_cache_->CompletedSubmissionUpdated(submission_completed_); } @@ -2870,10 +2870,10 @@ bool D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { } primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; - primitive_processor_->BeginSubmission(); - render_target_cache_->BeginSubmission(); + primitive_processor_->BeginSubmission(); + texture_cache_->BeginSubmission(submission_current_); } @@ -3043,12 +3043,10 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) { } constant_buffer_pool_->ClearCache(); - pipeline_cache_->ClearCache(); - - render_target_cache_->ClearCache(); - texture_cache_->ClearCache(); + pipeline_cache_->ClearCache(); + for (auto it : root_signatures_bindful_) { it.second->Release(); } @@ -3056,6 +3054,8 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) { primitive_processor_->ClearCache(); + render_target_cache_->ClearCache(); + shared_memory_->ClearCache(); } } diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index e9cefb337..0b1e8fa53 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -427,6 +427,8 @@ class D3D12CommandProcessor : public CommandProcessor { // of UpdateBindings time, and that's outside the emulator's control even). bool bindless_resources_used_ = false; + std::unique_ptr shared_memory_; + std::unique_ptr render_target_cache_; std::unique_ptr constant_buffer_pool_; @@ -491,8 +493,6 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12RootSignature* root_signature_bindless_vs_ = nullptr; ID3D12RootSignature* root_signature_bindless_ds_ = nullptr; - std::unique_ptr shared_memory_; - std::unique_ptr primitive_processor_; std::unique_ptr pipeline_cache_; From a65fd4f673e457e279f71cd504019ea8b13e4632 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 15 May 2022 16:13:05 +0300 Subject: [PATCH 5/5] [GPU] Shader::IsHostVertexShaderTypeDomain --- src/xenia/gpu/d3d12/pipeline_cache.cc | 62 ++++++++++++++------------ src/xenia/gpu/dxbc_shader_translator.h | 8 ++-- src/xenia/gpu/primitive_processor.h | 2 +- src/xenia/gpu/shader.h | 11 ++++- 4 files changed, 49 insertions(+), 34 deletions(-) diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index b06e92e42..1242229e8 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -685,10 +685,10 @@ void PipelineCache::InitializeShaderStorage( pipeline_runtime_description.root_signature = command_processor_.GetRootSignature( vertex_shader, pixel_shader, - DxbcShaderTranslator::Modification( - pipeline_description.vertex_shader_modification) - .vertex.host_vertex_shader_type != - Shader::HostVertexShaderType::kVertex); + Shader::IsHostVertexShaderTypeDomain( + DxbcShaderTranslator::Modification( + pipeline_description.vertex_shader_modification) + .vertex.host_vertex_shader_type)); if (!pipeline_runtime_description.root_signature) { continue; } @@ -2834,30 +2834,7 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline( DxbcShaderTranslator::Modification( runtime_description.vertex_shader->modification()) .vertex.host_vertex_shader_type; - if (host_vertex_shader_type == Shader::HostVertexShaderType::kVertex) { - state_desc.VS.pShaderBytecode = - runtime_description.vertex_shader->translated_binary().data(); - state_desc.VS.BytecodeLength = - runtime_description.vertex_shader->translated_binary().size(); - PipelinePrimitiveTopologyType primitive_topology_type = - PipelinePrimitiveTopologyType( - description.primitive_topology_type_or_tessellation_mode); - switch (primitive_topology_type) { - case PipelinePrimitiveTopologyType::kPoint: - state_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; - break; - case PipelinePrimitiveTopologyType::kLine: - state_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; - break; - case PipelinePrimitiveTopologyType::kTriangle: - state_desc.PrimitiveTopologyType = - D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - break; - default: - assert_unhandled_case(primitive_topology_type); - return nullptr; - } - } else { + if (Shader::IsHostVertexShaderTypeDomain(host_vertex_shader_type)) { state_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH; xenos::TessellationMode tessellation_mode = xenos::TessellationMode( description.primitive_topology_type_or_tessellation_mode); @@ -2929,6 +2906,35 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline( runtime_description.vertex_shader->translated_binary().data(); state_desc.DS.BytecodeLength = runtime_description.vertex_shader->translated_binary().size(); + } else { + assert_true(host_vertex_shader_type == + Shader::HostVertexShaderType::kVertex); + if (host_vertex_shader_type != Shader::HostVertexShaderType::kVertex) { + // Fallback vertex shaders are not needed on Direct3D 12. + return nullptr; + } + state_desc.VS.pShaderBytecode = + runtime_description.vertex_shader->translated_binary().data(); + state_desc.VS.BytecodeLength = + runtime_description.vertex_shader->translated_binary().size(); + PipelinePrimitiveTopologyType primitive_topology_type = + PipelinePrimitiveTopologyType( + description.primitive_topology_type_or_tessellation_mode); + switch (primitive_topology_type) { + case PipelinePrimitiveTopologyType::kPoint: + state_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + break; + case PipelinePrimitiveTopologyType::kLine: + state_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + break; + case PipelinePrimitiveTopologyType::kTriangle: + state_desc.PrimitiveTopologyType = + D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + break; + default: + assert_unhandled_case(primitive_topology_type); + return nullptr; + } } // Pixel shader. diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index e8009c210..9e029cb59 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -646,13 +646,13 @@ class DxbcShaderTranslator : public ShaderTranslator { bool IsDxbcVertexShader() const { return is_vertex_shader() && - GetDxbcShaderModification().vertex.host_vertex_shader_type == - Shader::HostVertexShaderType::kVertex; + !Shader::IsHostVertexShaderTypeDomain( + GetDxbcShaderModification().vertex.host_vertex_shader_type); } bool IsDxbcDomainShader() const { return is_vertex_shader() && - GetDxbcShaderModification().vertex.host_vertex_shader_type != - Shader::HostVertexShaderType::kVertex; + Shader::IsHostVertexShaderTypeDomain( + GetDxbcShaderModification().vertex.host_vertex_shader_type); } // Whether to use switch-case rather than if (pc >= label) for control flow. diff --git a/src/xenia/gpu/primitive_processor.h b/src/xenia/gpu/primitive_processor.h index b0a6713be..cfbec0ae9 100644 --- a/src/xenia/gpu/primitive_processor.h +++ b/src/xenia/gpu/primitive_processor.h @@ -145,7 +145,7 @@ class PrimitiveProcessor { // only valid for index_buffer_type kHostConverted and kHostBuiltin. size_t host_index_buffer_handle; bool IsTessellated() const { - return host_vertex_shader_type != Shader::HostVertexShaderType::kVertex; + return Shader::IsHostVertexShaderTypeDomain(host_vertex_shader_type); } }; diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 99ad84b8a..31f9de372 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -659,16 +659,25 @@ class Shader { // packed. This is : uint32_t for simplicity of packing in bit fields. enum class HostVertexShaderType : uint32_t { kVertex, - kLineDomainCPIndexed, + + kDomainStart, + kLineDomainCPIndexed = kDomainStart, kLineDomainPatchIndexed, kTriangleDomainCPIndexed, kTriangleDomainPatchIndexed, kQuadDomainCPIndexed, kQuadDomainPatchIndexed, + kDomainEnd, }; // For packing HostVertexShaderType in bit fields. static constexpr uint32_t kHostVertexShaderTypeBitCount = 3; + static constexpr bool IsHostVertexShaderTypeDomain( + HostVertexShaderType host_vertex_shader_type) { + return host_vertex_shader_type >= HostVertexShaderType::kDomainStart && + host_vertex_shader_type < HostVertexShaderType::kDomainEnd; + } + struct Error { bool is_fatal = false; std::string message;