diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index e39418cc8..b086f325b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1000,6 +1000,36 @@ bool D3D12CommandProcessor::SetupContext() { parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } + // Shared memory SRV and UAV. + D3D12_DESCRIPTOR_RANGE root_shared_memory_view_ranges[2]; + { + auto& parameter = + root_parameters_bindless[kRootParameter_Bindless_SharedMemory]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = + uint32_t(xe::countof(root_shared_memory_view_ranges)); + parameter.DescriptorTable.pDescriptorRanges = + root_shared_memory_view_ranges; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + { + auto& range = root_shared_memory_view_ranges[0]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = 1; + range.BaseShaderRegister = + UINT(DxbcShaderTranslator::SRVMainRegister::kSharedMemory); + range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kMain); + range.OffsetInDescriptorsFromTableStart = 0; + } + { + auto& range = root_shared_memory_view_ranges[1]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + range.NumDescriptors = 1; + range.BaseShaderRegister = + UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 1; + } + } // Sampler heap. D3D12_DESCRIPTOR_RANGE root_bindless_sampler_range; { @@ -1019,7 +1049,7 @@ bool D3D12CommandProcessor::SetupContext() { root_bindless_sampler_range.OffsetInDescriptorsFromTableStart = 0; } // View heap. - D3D12_DESCRIPTOR_RANGE root_bindless_view_ranges[6]; + D3D12_DESCRIPTOR_RANGE root_bindless_view_ranges[4]; { auto& parameter = root_parameters_bindless[kRootParameter_Bindless_ViewHeap]; @@ -1028,34 +1058,6 @@ bool D3D12CommandProcessor::SetupContext() { parameter.DescriptorTable.NumDescriptorRanges = 0; parameter.DescriptorTable.pDescriptorRanges = root_bindless_view_ranges; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - // Shared memory SRV. - { - assert_true(parameter.DescriptorTable.NumDescriptorRanges < - xe::countof(root_bindless_view_ranges)); - auto& range = root_bindless_view_ranges[parameter.DescriptorTable - .NumDescriptorRanges++]; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = 1; - range.BaseShaderRegister = - UINT(DxbcShaderTranslator::SRVMainRegister::kSharedMemory); - range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kMain); - range.OffsetInDescriptorsFromTableStart = - UINT(SystemBindlessView::kSharedMemoryRawSRV); - } - // Shared memory UAV. - { - assert_true(parameter.DescriptorTable.NumDescriptorRanges < - xe::countof(root_bindless_view_ranges)); - auto& range = root_bindless_view_ranges[parameter.DescriptorTable - .NumDescriptorRanges++]; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - range.NumDescriptors = 1; - range.BaseShaderRegister = - UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = - UINT(SystemBindlessView::kSharedMemoryRawUAV); - } // EDRAM. if (render_target_cache_->GetPath() == RenderTargetCache::Path::kPixelShaderInterlock) { @@ -1418,6 +1420,20 @@ bool D3D12CommandProcessor::SetupContext() { if (bindless_resources_used_) { // Create the system bindless descriptors once all resources are // initialized. + // kNullRawSRV. + ui::d3d12::util::CreateBufferRawSRV( + device, + provider.OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kNullRawSRV)), + nullptr, 0); + // kNullRawUAV. + ui::d3d12::util::CreateBufferRawUAV( + device, + provider.OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kNullRawUAV)), + nullptr, 0); // kNullTexture2DArray. D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc; null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; @@ -2272,7 +2288,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, used_texture_mask, normalized_depth_control, normalized_color_mask); // Update constant buffers, descriptors and root parameters. - if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) { + if (!UpdateBindings(vertex_shader, pixel_shader, root_signature, + memexport_used)) { return false; } // Must not call anything that can change the descriptor heap from now on! @@ -2890,6 +2907,7 @@ bool D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { cbuffer_binding_float_pixel_.up_to_date = false; cbuffer_binding_bool_loop_.up_to_date = false; cbuffer_binding_fetch_.up_to_date = false; + current_shared_memory_binding_is_uav_.reset(); if (bindless_resources_used_) { cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; @@ -3649,9 +3667,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( cbuffer_binding_system_.up_to_date &= !dirty; } -bool D3D12CommandProcessor::UpdateBindings( - const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, - ID3D12RootSignature* root_signature) { +bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, + const D3D12Shader* pixel_shader, + ID3D12RootSignature* root_signature, + bool shared_memory_is_uav) { const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); const RegisterFile& regs = *register_file_; @@ -3688,6 +3707,9 @@ bool D3D12CommandProcessor::UpdateBindings( uint32_t root_parameter_bool_loop_constants = bindless_resources_used_ ? kRootParameter_Bindless_BoolLoopConstants : kRootParameter_Bindful_BoolLoopConstants; + uint32_t root_parameter_shared_memory_and_bindful_edram = + bindless_resources_used_ ? kRootParameter_Bindless_SharedMemory + : kRootParameter_Bindful_SharedMemoryAndEdram; // // Update root constant buffers that are common for bindful and bindless. @@ -3852,6 +3874,13 @@ bool D3D12CommandProcessor::UpdateBindings( // Update descriptors. // + if (!current_shared_memory_binding_is_uav_.has_value() || + current_shared_memory_binding_is_uav_.value() != shared_memory_is_uav) { + current_shared_memory_binding_is_uav_ = shared_memory_is_uav; + current_graphics_root_up_to_date_ &= + ~(1u << root_parameter_shared_memory_and_bindful_edram); + } + // Get textures and samplers used by the vertex shader, check if the last used // samplers are compatible and update them. size_t texture_layout_uid_vertex = @@ -4180,12 +4209,14 @@ bool D3D12CommandProcessor::UpdateBindings( if (write_textures_pixel) { view_count_partial_update += texture_count_pixel; } - // All the constants + shared memory SRV and UAV + textures. + // Shared memory SRV and null UAV + null SRV and shared memory UAV + + // textures. size_t view_count_full_update = - 2 + texture_count_vertex + texture_count_pixel; + 4 + texture_count_vertex + texture_count_pixel; if (edram_rov_used) { - // + EDRAM UAV. - ++view_count_full_update; + // + EDRAM UAV in two tables (with the shared memory SRV and with the + // shared memory UAV). + view_count_full_update += 2; } D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle; @@ -4230,10 +4261,25 @@ bool D3D12CommandProcessor::UpdateBindings( bindful_textures_written_pixel_ = false; // If updating fully, write the shared memory SRV and UAV descriptors and, // if needed, the EDRAM descriptor. - gpu_handle_shared_memory_and_edram_ = view_gpu_handle; + // SRV + null UAV + EDRAM. + gpu_handle_shared_memory_srv_and_edram_ = view_gpu_handle; shared_memory_->WriteRawSRVDescriptor(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; + ui::d3d12::util::CreateBufferRawUAV(device, view_cpu_handle, nullptr, 0); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + if (edram_rov_used) { + render_target_cache_->WriteEdramUintPow2UAVDescriptor(view_cpu_handle, + 2); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + } + // Null SRV + UAV + EDRAM. + gpu_handle_shared_memory_uav_and_edram_ = view_gpu_handle; + ui::d3d12::util::CreateBufferRawSRV(device, view_cpu_handle, nullptr, 0); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; shared_memory_->WriteRawUAVDescriptor(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; @@ -4372,6 +4418,31 @@ bool D3D12CommandProcessor::UpdateBindings( current_graphics_root_up_to_date_ |= 1u << root_parameter_bool_loop_constants; } + if (!(current_graphics_root_up_to_date_ & + (1u << root_parameter_shared_memory_and_bindful_edram))) { + assert_true(current_shared_memory_binding_is_uav_.has_value()); + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_bindful_edram; + if (bindless_resources_used_) { + gpu_handle_shared_memory_and_bindful_edram = + provider.OffsetViewDescriptor( + view_bindless_heap_gpu_start_, + uint32_t(current_shared_memory_binding_is_uav_.value() + ? SystemBindlessView :: + kNullRawSRVAndSharedMemoryRawUAVStart + : SystemBindlessView :: + kSharedMemoryRawSRVAndNullRawUAVStart)); + } else { + gpu_handle_shared_memory_and_bindful_edram = + current_shared_memory_binding_is_uav_.value() + ? gpu_handle_shared_memory_uav_and_edram_ + : gpu_handle_shared_memory_srv_and_edram_; + } + deferred_command_list_.D3DSetGraphicsRootDescriptorTable( + root_parameter_shared_memory_and_bindful_edram, + gpu_handle_shared_memory_and_bindful_edram); + current_graphics_root_up_to_date_ |= + 1u << root_parameter_shared_memory_and_bindful_edram; + } if (bindless_resources_used_) { if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_Bindless_DescriptorIndicesPixel))) { @@ -4405,14 +4476,6 @@ bool D3D12CommandProcessor::UpdateBindings( << kRootParameter_Bindless_ViewHeap; } } else { - if (!(current_graphics_root_up_to_date_ & - (1u << kRootParameter_Bindful_SharedMemoryAndEdram))) { - deferred_command_list_.D3DSetGraphicsRootDescriptorTable( - kRootParameter_Bindful_SharedMemoryAndEdram, - gpu_handle_shared_memory_and_edram_); - current_graphics_root_up_to_date_ |= - 1u << kRootParameter_Bindful_SharedMemoryAndEdram; - } uint32_t extra_index; extra_index = current_graphics_root_bindful_extras_.textures_pixel; if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 6162b4683..353e6bd0b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -128,11 +129,19 @@ class D3D12CommandProcessor : public CommandProcessor { uint32_t count, ui::d3d12::util::DescriptorCpuGpuHandlePair* handles_out); // These are needed often, so they are always allocated. enum class SystemBindlessView : uint32_t { - kSharedMemoryRawSRV, + // Both may be bound as one root parameter. + kSharedMemoryRawSRVAndNullRawUAVStart, + kSharedMemoryRawSRV = kSharedMemoryRawSRVAndNullRawUAVStart, + kNullRawUAV, + + // Both may be bound as one root parameter. + kNullRawSRVAndSharedMemoryRawUAVStart, + kNullRawSRV = kNullRawSRVAndSharedMemoryRawUAVStart, + kSharedMemoryRawUAV, + kSharedMemoryR32UintSRV, kSharedMemoryR32G32UintSRV, kSharedMemoryR32G32B32A32UintSRV, - kSharedMemoryRawUAV, kSharedMemoryR32UintUAV, kSharedMemoryR32G32UintUAV, kSharedMemoryR32G32B32A32UintUAV, @@ -253,10 +262,10 @@ class D3D12CommandProcessor : public CommandProcessor { kRootParameter_Bindful_SystemConstants, // +2 = 6 in all. // Pretty rarely used and rarely changed - flow control constants. kRootParameter_Bindful_BoolLoopConstants, // +2 = 8 in all. - // Never changed except for when starting a new descriptor heap - shared - // memory byte address buffer, and, if ROV is used for EDRAM, EDRAM R32_UINT - // UAV. - // SRV/UAV descriptor table. + // Changed only when starting a new descriptor heap or when switching + // between shared memory as SRV and UAV - shared memory byte address buffer + // (as SRV and as UAV, either may be null if not used), and, if ROV is used + // for EDRAM, EDRAM R32_UINT UAV. kRootParameter_Bindful_SharedMemoryAndEdram, // +1 = 9 in all. kRootParameter_Bindful_Count_Base, @@ -280,10 +289,14 @@ class D3D12CommandProcessor : public CommandProcessor { kRootParameter_Bindless_DescriptorIndicesVertex, // +2 = 6 in VS. kRootParameter_Bindless_SystemConstants, // +2 = 8 in all. kRootParameter_Bindless_BoolLoopConstants, // +2 = 10 in all. + // Changed only when switching between shared memory as SRV and UAV - shared + // memory byte address buffer (as SRV and as UAV, either may be null if not + // used). + kRootParameter_Bindless_SharedMemory, // +1 = 11 in all. // Unbounded sampler descriptor table - changed in case of overflow. - kRootParameter_Bindless_SamplerHeap, // +1 = 11 in all. + kRootParameter_Bindless_SamplerHeap, // +1 = 12 in all. // Unbounded SRV/UAV descriptor table - never changed. - kRootParameter_Bindless_ViewHeap, // +1 = 12 in all. + kRootParameter_Bindless_ViewHeap, // +1 = 13 in all. kRootParameter_Bindless_Count, }; @@ -362,7 +375,8 @@ class D3D12CommandProcessor : public CommandProcessor { uint32_t normalized_color_mask); bool UpdateBindings(const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, - ID3D12RootSignature* root_signature); + ID3D12RootSignature* root_signature, + bool shared_memory_is_uav); // Returns dword count for one element for a memexport format, or 0 if it's // not supported by the D3D12 command processor (if it's smaller that 1 dword, @@ -622,6 +636,13 @@ class D3D12CommandProcessor : public CommandProcessor { ConstantBufferBinding cbuffer_binding_descriptor_indices_vertex_; ConstantBufferBinding cbuffer_binding_descriptor_indices_pixel_; + // Whether the latest shared memory and EDRAM buffer binding contains the + // shared memory UAV rather than the SRV. + // Separate descriptor tables for the SRV and the UAV, even though only one is + // accessed dynamically in the shaders, are used to prevent a validation + // message about missing resource states in PIX. + std::optional current_shared_memory_binding_is_uav_; + // Pages with the descriptors currently used for handling Xenos draw calls. uint64_t draw_view_bindful_heap_index_; uint64_t draw_sampler_bindful_heap_index_; @@ -654,7 +675,8 @@ class D3D12CommandProcessor : public CommandProcessor { std::vector current_sampler_bindless_indices_pixel_; // Latest bindful descriptor handles used for handling Xenos draw calls. - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_edram_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_srv_and_edram_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_uav_and_edram_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_vertex_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_pixel_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_samplers_vertex_;