diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 3f1e07b70..f0be8c50e 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -950,6 +950,10 @@ bool D3D12CommandProcessor::SetupContext() { if (bindless_resources_used_) { // Global bindless resource root signatures. + // No CBV or UAV descriptor ranges with any descriptors to be allocated + // dynamically (via RequestPersistentViewBindlessDescriptor or + // RequestOneUseSingleViewDescriptors) should be here, because they would + // overlap the unbounded SRV range, which is not allowed on Nvidia Fermi! D3D12_ROOT_SIGNATURE_DESC root_signature_bindless_desc; D3D12_ROOT_PARAMETER root_parameters_bindless[kRootParameter_Bindless_Count]; @@ -1056,45 +1060,6 @@ bool D3D12CommandProcessor::SetupContext() { parameter.DescriptorTable.NumDescriptorRanges = 0; parameter.DescriptorTable.pDescriptorRanges = root_bindless_view_ranges; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - // 2D array textures. - { - assert_true(parameter.DescriptorTable.NumDescriptorRanges < - xe::countof(root_bindless_view_ranges)); - auto& range = root_bindless_view_ranges[parameter.DescriptorTable - .NumDescriptorRanges++]; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = UINT_MAX; - range.BaseShaderRegister = 0; - range.RegisterSpace = - UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures2DArray); - range.OffsetInDescriptorsFromTableStart = 0; - } - // 3D textures. - { - assert_true(parameter.DescriptorTable.NumDescriptorRanges < - xe::countof(root_bindless_view_ranges)); - auto& range = root_bindless_view_ranges[parameter.DescriptorTable - .NumDescriptorRanges++]; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = UINT_MAX; - range.BaseShaderRegister = 0; - range.RegisterSpace = - UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures3D); - range.OffsetInDescriptorsFromTableStart = 0; - } - // Cube textures. - { - assert_true(parameter.DescriptorTable.NumDescriptorRanges < - xe::countof(root_bindless_view_ranges)); - auto& range = root_bindless_view_ranges[parameter.DescriptorTable - .NumDescriptorRanges++]; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = UINT_MAX; - range.BaseShaderRegister = 0; - range.RegisterSpace = - UINT(DxbcShaderTranslator::SRVSpace::kBindlessTexturesCube); - range.OffsetInDescriptorsFromTableStart = 0; - } // Shared memory SRV. { assert_true(parameter.DescriptorTable.NumDescriptorRanges < @@ -1137,6 +1102,51 @@ bool D3D12CommandProcessor::SetupContext() { range.OffsetInDescriptorsFromTableStart = UINT(SystemBindlessView::kEdramR32UintUAV); } + // Used UAV and SRV ranges must not overlap on Nvidia Fermi, so textures + // have OffsetInDescriptorsFromTableStart after all static descriptors of + // other types. + // 2D array textures. + { + assert_true(parameter.DescriptorTable.NumDescriptorRanges < + xe::countof(root_bindless_view_ranges)); + auto& range = root_bindless_view_ranges[parameter.DescriptorTable + .NumDescriptorRanges++]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = UINT_MAX; + range.BaseShaderRegister = 0; + range.RegisterSpace = + UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures2DArray); + range.OffsetInDescriptorsFromTableStart = + UINT(SystemBindlessView::kUnboundedSRVsStart); + } + // 3D textures. + { + assert_true(parameter.DescriptorTable.NumDescriptorRanges < + xe::countof(root_bindless_view_ranges)); + auto& range = root_bindless_view_ranges[parameter.DescriptorTable + .NumDescriptorRanges++]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = UINT_MAX; + range.BaseShaderRegister = 0; + range.RegisterSpace = + UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures3D); + range.OffsetInDescriptorsFromTableStart = + UINT(SystemBindlessView::kUnboundedSRVsStart); + } + // Cube textures. + { + assert_true(parameter.DescriptorTable.NumDescriptorRanges < + xe::countof(root_bindless_view_ranges)); + auto& range = root_bindless_view_ranges[parameter.DescriptorTable + .NumDescriptorRanges++]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = UINT_MAX; + range.BaseShaderRegister = 0; + range.RegisterSpace = + UINT(DxbcShaderTranslator::SRVSpace::kBindlessTexturesCube); + range.OffsetInDescriptorsFromTableStart = + UINT(SystemBindlessView::kUnboundedSRVsStart); + } } root_signature_bindless_vs_ = ui::d3d12::util::CreateRootSignature( provider, root_signature_bindless_desc); @@ -3938,7 +3948,8 @@ bool D3D12CommandProcessor::UpdateBindings( for (uint32_t i = 0; i < texture_count_vertex; ++i) { const D3D12Shader::TextureBinding& texture = textures_vertex[i]; descriptor_indices[texture.bindless_descriptor_index] = - texture_cache_->GetActiveTextureBindlessSRVIndex(texture); + texture_cache_->GetActiveTextureBindlessSRVIndex(texture) - + uint32_t(SystemBindlessView::kUnboundedSRVsStart); } current_texture_layout_uid_vertex_ = texture_layout_uid_vertex; if (texture_count_vertex) { @@ -3973,7 +3984,8 @@ bool D3D12CommandProcessor::UpdateBindings( for (uint32_t i = 0; i < texture_count_pixel; ++i) { const D3D12Shader::TextureBinding& texture = textures_pixel[i]; descriptor_indices[texture.bindless_descriptor_index] = - texture_cache_->GetActiveTextureBindlessSRVIndex(texture); + texture_cache_->GetActiveTextureBindlessSRVIndex(texture) - + uint32_t(SystemBindlessView::kUnboundedSRVsStart); } current_texture_layout_uid_pixel_ = texture_layout_uid_pixel; if (texture_count_pixel) { diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 3bab0d6d5..0b5a80e68 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -114,20 +114,23 @@ class D3D12CommandProcessor : public CommandProcessor { assert_true(bindless_resources_used_); return view_bindless_heap_gpu_start_; } - // Returns UINT32_MAX if no free descriptors. + // Returns UINT32_MAX if no free descriptors. If the unbounded SRV range for + // bindless resources is also used in the root signature of the draw / + // dispatch referencing this descriptor, this must only be used to allocate + // SRVs, otherwise it won't work on Nvidia Fermi (root signature creation will + // fail)! uint32_t RequestPersistentViewBindlessDescriptor(); void ReleaseViewBindlessDescriptorImmediately(uint32_t descriptor_index); - // Request non-contiguous SRV/UAV descriptors for use only within the next + // Request non-contiguous CBV/SRV/UAV descriptors for use only within the next // draw or dispatch command done for internal purposes. May change the current - // descriptor heap. + // descriptor heap. If the unbounded SRV range for bindless resources is also + // used in the root signature of the draw / dispatch referencing these + // descriptors, this must only be used to allocate SRVs, otherwise it won't + // work on Nvidia Fermi (root signature creation will fail)! bool RequestOneUseSingleViewDescriptors( uint32_t count, ui::d3d12::util::DescriptorCPUGPUHandlePair* handles_out); // These are needed often, so they are always allocated. enum class SystemBindlessView : uint32_t { - kNullTexture2DArray, - kNullTexture3D, - kNullTextureCube, - kSharedMemoryRawSRV, kSharedMemoryR32UintSRV, kSharedMemoryR32G32UintSRV, @@ -148,6 +151,14 @@ class D3D12CommandProcessor : public CommandProcessor { kGammaRampNormalSRV, kGammaRampPWLSRV, + // Beyond this point, SRVs are accessible to shaders through an unbounded + // range - no descriptors of other types bound to shaders alongside + // unbounded ranges - must be located beyond this point. + kUnboundedSRVsStart, + kNullTexture2DArray = kUnboundedSRVsStart, + kNullTexture3D, + kNullTextureCube, + kCount, }; ui::d3d12::util::DescriptorCPUGPUHandlePair GetSystemBindlessViewHandlePair(