diff --git a/src/xenia/base/hash.h b/src/xenia/base/hash.h new file mode 100644 index 000000000..b4f252eb4 --- /dev/null +++ b/src/xenia/base/hash.h @@ -0,0 +1,30 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_BASE_HASH_H_ +#define XENIA_BASE_HASH_H_ + +#include + +namespace xe { +namespace hash { + +// For use in unordered_sets and unordered_maps (primarily multisets and +// multimaps, with manual collision resolution), where the hash is calculated +// externally (for instance, as XXH64), possibly requiring context data rather +// than a pure function to calculate the hash +template +struct IdentityHasher { + size_t operator()(const Key& key) const { return static_cast(key); } +}; + +} // namespace hash +} // namespace xe + +#endif // XENIA_BASE_HASH_H_ diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 787c8d971..edf94cba1 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -11,6 +11,7 @@ #include #include +#include #include "xenia/base/assert.h" #include "xenia/base/cvar.h" @@ -24,6 +25,10 @@ #include "xenia/gpu/xenos.h" #include "xenia/ui/d3d12/d3d12_util.h" +DEFINE_bool(d3d12_bindless_resources, false, + "Use bindless resources where available - may improve performance, " + "but may make debugging more complicated.", + "D3D12"); DEFINE_bool(d3d12_edram_rov, true, "Use rasterizer-ordered views for render target emulation where " "available.", @@ -63,7 +68,10 @@ namespace d3d12 { constexpr uint32_t D3D12CommandProcessor::kQueueFrames; constexpr uint32_t - D3D12CommandProcessor::RootExtraParameterIndices::kUnavailable; + D3D12CommandProcessor::RootBindfulExtraParameterIndices::kUnavailable; +constexpr uint32_t D3D12CommandProcessor::kViewBindfulHeapSize; +constexpr uint32_t D3D12CommandProcessor::kViewBindlessHeapSize; +constexpr uint32_t D3D12CommandProcessor::kSamplerHeapSize; constexpr uint32_t D3D12CommandProcessor::kSwapTextureWidth; constexpr uint32_t D3D12CommandProcessor::kSwapTextureHeight; constexpr uint32_t D3D12CommandProcessor::kScratchBufferSizeIncrement; @@ -167,6 +175,14 @@ void D3D12CommandProcessor::SubmitBarriers() { ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) { assert_true(vertex_shader->is_translated()); + + if (bindless_resources_used_) { + return vertex_shader->host_vertex_shader_type() != + Shader::HostVertexShaderType::kVertex + ? root_signature_bindless_ds_ + : root_signature_bindless_vs_; + } + assert_true(pixel_shader == nullptr || pixel_shader->is_translated()); D3D12_SHADER_VISIBILITY vertex_visibility; @@ -178,11 +194,11 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( } uint32_t texture_count_vertex, sampler_count_vertex; - vertex_shader->GetTextureSRVs(texture_count_vertex); + vertex_shader->GetTextureBindings(texture_count_vertex); vertex_shader->GetSamplerBindings(sampler_count_vertex); uint32_t texture_count_pixel = 0, sampler_count_pixel = 0; if (pixel_shader != nullptr) { - pixel_shader->GetTextureSRVs(texture_count_pixel); + pixel_shader->GetTextureBindings(texture_count_pixel); pixel_shader->GetSamplerBindings(sampler_count_pixel); } @@ -191,11 +207,11 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( uint32_t index = 0; uint32_t index_offset = 0; index |= texture_count_pixel << index_offset; - index_offset += D3D12Shader::kMaxTextureSRVIndexBits; + index_offset += D3D12Shader::kMaxTextureBindingIndexBits; index |= sampler_count_pixel << index_offset; index_offset += D3D12Shader::kMaxSamplerBindingIndexBits; index |= texture_count_vertex << index_offset; - index_offset += D3D12Shader::kMaxTextureSRVIndexBits; + index_offset += D3D12Shader::kMaxTextureBindingIndexBits; index |= sampler_count_vertex << index_offset; index_offset += D3D12Shader::kMaxSamplerBindingIndexBits; index |= uint32_t(vertex_visibility == D3D12_SHADER_VISIBILITY_DOMAIN) @@ -204,16 +220,15 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( assert_true(index_offset <= 32); // Try an existing root signature. - auto it = root_signatures_.find(index); - if (it != root_signatures_.end()) { + auto it = root_signatures_bindful_.find(index); + if (it != root_signatures_bindful_.end()) { return it->second; } // Create a new one. D3D12_ROOT_SIGNATURE_DESC desc; - D3D12_ROOT_PARAMETER parameters[kRootParameter_Count_Max]; - D3D12_DESCRIPTOR_RANGE ranges[kRootParameter_Count_Max]; - desc.NumParameters = kRootParameter_Count_Base; + D3D12_ROOT_PARAMETER parameters[kRootParameter_Bindful_Count_Max]; + desc.NumParameters = kRootParameter_Bindful_Count_Base; desc.pParameters = parameters; desc.NumStaticSamplers = 0; desc.pStaticSamplers = nullptr; @@ -223,88 +238,58 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( // Fetch constants. { - auto& parameter = parameters[kRootParameter_FetchConstants]; - auto& range = ranges[kRootParameter_FetchConstants]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - range.NumDescriptors = 1; - range.BaseShaderRegister = + auto& parameter = parameters[kRootParameter_Bindful_FetchConstants]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants); - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } // Vertex float constants. { - auto& parameter = parameters[kRootParameter_FloatConstantsVertex]; - auto& range = ranges[kRootParameter_FloatConstantsVertex]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = vertex_visibility; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - range.NumDescriptors = 1; - range.BaseShaderRegister = + auto& parameter = parameters[kRootParameter_Bindful_FloatConstantsVertex]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants); - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = vertex_visibility; } // Pixel float constants. { - auto& parameter = parameters[kRootParameter_FloatConstantsPixel]; - auto& range = ranges[kRootParameter_FloatConstantsPixel]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - range.NumDescriptors = 1; - range.BaseShaderRegister = + auto& parameter = parameters[kRootParameter_Bindful_FloatConstantsPixel]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants); - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; } // System constants. { - auto& parameter = parameters[kRootParameter_SystemConstants]; - auto& range = ranges[kRootParameter_SystemConstants]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - range.NumDescriptors = 1; - range.BaseShaderRegister = + auto& parameter = parameters[kRootParameter_Bindful_SystemConstants]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants); - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } // Bool and loop constants. { - auto& parameter = parameters[kRootParameter_BoolLoopConstants]; - auto& range = ranges[kRootParameter_BoolLoopConstants]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - range.NumDescriptors = 1; - range.BaseShaderRegister = + auto& parameter = parameters[kRootParameter_Bindful_BoolLoopConstants]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kBoolLoopConstants); - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } // Shared memory and, if ROVs are used, EDRAM. D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[3]; { - auto& parameter = parameters[kRootParameter_SharedMemoryAndEDRAM]; + auto& parameter = parameters[kRootParameter_Bindful_SharedMemoryAndEDRAM]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 2; parameter.DescriptorTable.pDescriptorRanges = @@ -340,68 +325,70 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( // Extra parameters. // Pixel textures. + D3D12_DESCRIPTOR_RANGE range_textures_pixel; if (texture_count_pixel > 0) { auto& parameter = parameters[desc.NumParameters]; - auto& range = ranges[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.DescriptorTable.pDescriptorRanges = &range_textures_pixel; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = texture_count_pixel; - range.BaseShaderRegister = - uint32_t(DxbcShaderTranslator::SRVMainRegister::kBoundTexturesStart); - range.RegisterSpace = uint32_t(DxbcShaderTranslator::SRVSpace::kMain); - range.OffsetInDescriptorsFromTableStart = 0; + range_textures_pixel.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range_textures_pixel.NumDescriptors = texture_count_pixel; + range_textures_pixel.BaseShaderRegister = + uint32_t(DxbcShaderTranslator::SRVMainRegister::kBindfulTexturesStart); + range_textures_pixel.RegisterSpace = + uint32_t(DxbcShaderTranslator::SRVSpace::kMain); + range_textures_pixel.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } // Pixel samplers. + D3D12_DESCRIPTOR_RANGE range_samplers_pixel; if (sampler_count_pixel > 0) { auto& parameter = parameters[desc.NumParameters]; - auto& range = ranges[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.DescriptorTable.pDescriptorRanges = &range_samplers_pixel; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - range.NumDescriptors = sampler_count_pixel; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; + range_samplers_pixel.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + range_samplers_pixel.NumDescriptors = sampler_count_pixel; + range_samplers_pixel.BaseShaderRegister = 0; + range_samplers_pixel.RegisterSpace = 0; + range_samplers_pixel.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } // Vertex textures. + D3D12_DESCRIPTOR_RANGE range_textures_vertex; if (texture_count_vertex > 0) { auto& parameter = parameters[desc.NumParameters]; - auto& range = ranges[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.DescriptorTable.pDescriptorRanges = &range_textures_vertex; parameter.ShaderVisibility = vertex_visibility; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = texture_count_vertex; - range.BaseShaderRegister = - uint32_t(DxbcShaderTranslator::SRVMainRegister::kBoundTexturesStart); - range.RegisterSpace = uint32_t(DxbcShaderTranslator::SRVSpace::kMain); - range.OffsetInDescriptorsFromTableStart = 0; + range_textures_vertex.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range_textures_vertex.NumDescriptors = texture_count_vertex; + range_textures_vertex.BaseShaderRegister = + uint32_t(DxbcShaderTranslator::SRVMainRegister::kBindfulTexturesStart); + range_textures_vertex.RegisterSpace = + uint32_t(DxbcShaderTranslator::SRVSpace::kMain); + range_textures_vertex.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } // Vertex samplers. + D3D12_DESCRIPTOR_RANGE range_samplers_vertex; if (sampler_count_vertex > 0) { auto& parameter = parameters[desc.NumParameters]; - auto& range = ranges[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.DescriptorTable.pDescriptorRanges = &range_samplers_vertex; parameter.ShaderVisibility = vertex_visibility; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - range.NumDescriptors = sampler_count_vertex; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; + range_samplers_vertex.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + range_samplers_vertex.NumDescriptors = sampler_count_vertex; + range_samplers_vertex.BaseShaderRegister = 0; + range_samplers_vertex.RegisterSpace = 0; + range_samplers_vertex.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } @@ -415,97 +402,185 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( sampler_count_vertex); return nullptr; } - root_signatures_.insert({index, root_signature}); + root_signatures_bindful_.insert({index, root_signature}); return root_signature; } -uint32_t D3D12CommandProcessor::GetRootExtraParameterIndices( +uint32_t D3D12CommandProcessor::GetRootBindfulExtraParameterIndices( const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, - RootExtraParameterIndices& indices_out) { + RootBindfulExtraParameterIndices& indices_out) { uint32_t texture_count_pixel = 0, sampler_count_pixel = 0; if (pixel_shader != nullptr) { - pixel_shader->GetTextureSRVs(texture_count_pixel); + pixel_shader->GetTextureBindings(texture_count_pixel); pixel_shader->GetSamplerBindings(sampler_count_pixel); } uint32_t texture_count_vertex, sampler_count_vertex; - vertex_shader->GetTextureSRVs(texture_count_vertex); + vertex_shader->GetTextureBindings(texture_count_vertex); vertex_shader->GetSamplerBindings(sampler_count_vertex); - uint32_t index = kRootParameter_Count_Base; + uint32_t index = kRootParameter_Bindful_Count_Base; if (texture_count_pixel != 0) { indices_out.textures_pixel = index++; } else { - indices_out.textures_pixel = RootExtraParameterIndices::kUnavailable; + indices_out.textures_pixel = RootBindfulExtraParameterIndices::kUnavailable; } if (sampler_count_pixel != 0) { indices_out.samplers_pixel = index++; } else { - indices_out.samplers_pixel = RootExtraParameterIndices::kUnavailable; + indices_out.samplers_pixel = RootBindfulExtraParameterIndices::kUnavailable; } if (texture_count_vertex != 0) { indices_out.textures_vertex = index++; } else { - indices_out.textures_vertex = RootExtraParameterIndices::kUnavailable; + indices_out.textures_vertex = + RootBindfulExtraParameterIndices::kUnavailable; } if (sampler_count_vertex != 0) { indices_out.samplers_vertex = index++; } else { - indices_out.samplers_vertex = RootExtraParameterIndices::kUnavailable; + indices_out.samplers_vertex = + RootBindfulExtraParameterIndices::kUnavailable; } return index; } -uint64_t D3D12CommandProcessor::RequestViewDescriptors( +uint64_t D3D12CommandProcessor::RequestViewBindfulDescriptors( uint64_t previous_heap_index, uint32_t count_for_partial_update, uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) { + assert_false(bindless_resources_used_); + assert_true(submission_open_); uint32_t descriptor_index; - uint64_t current_heap_index = view_heap_pool_->Request( + uint64_t current_heap_index = view_bindful_heap_pool_->Request( frame_current_, previous_heap_index, count_for_partial_update, count_for_full_update, descriptor_index); if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { // There was an error. return ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; } - ID3D12DescriptorHeap* heap = view_heap_pool_->GetLastRequestHeap(); - if (current_view_heap_ != heap) { - current_view_heap_ = heap; - deferred_command_list_->SetDescriptorHeaps(current_view_heap_, - current_sampler_heap_); + ID3D12DescriptorHeap* heap = view_bindful_heap_pool_->GetLastRequestHeap(); + if (view_bindful_heap_current_ != heap) { + view_bindful_heap_current_ = heap; + deferred_command_list_->SetDescriptorHeaps(view_bindful_heap_current_, + sampler_bindful_heap_current_); } auto provider = GetD3D12Context()->GetD3D12Provider(); cpu_handle_out = provider->OffsetViewDescriptor( - view_heap_pool_->GetLastRequestHeapCPUStart(), descriptor_index); + view_bindful_heap_pool_->GetLastRequestHeapCPUStart(), descriptor_index); gpu_handle_out = provider->OffsetViewDescriptor( - view_heap_pool_->GetLastRequestHeapGPUStart(), descriptor_index); + view_bindful_heap_pool_->GetLastRequestHeapGPUStart(), descriptor_index); return current_heap_index; } -uint64_t D3D12CommandProcessor::RequestSamplerDescriptors( +uint32_t D3D12CommandProcessor::RequestPersistentViewBindlessDescriptor() { + assert_true(bindless_resources_used_); + if (!view_bindless_heap_free_.empty()) { + uint32_t descriptor_index = view_bindless_heap_free_.back(); + view_bindless_heap_free_.pop_back(); + return descriptor_index; + } + if (view_bindless_heap_allocated_ >= kViewBindlessHeapSize) { + return UINT32_MAX; + } + return view_bindless_heap_allocated_++; +} + +void D3D12CommandProcessor::ReleaseViewBindlessDescriptorImmediately( + uint32_t descriptor_index) { + assert_true(bindless_resources_used_); + view_bindless_heap_free_.push_back(descriptor_index); +} + +bool D3D12CommandProcessor::RequestOneUseSingleViewDescriptors( + uint32_t count, ui::d3d12::util::DescriptorCPUGPUHandlePair* handles_out) { + assert_true(submission_open_); + if (!count) { + return true; + } + assert_not_null(handles_out); + auto provider = GetD3D12Context()->GetD3D12Provider(); + if (bindless_resources_used_) { + // Request separate bindless descriptors that will be freed when this + // submission is completed by the GPU. + if (count > kViewBindlessHeapSize - view_bindless_heap_allocated_ + + view_bindless_heap_free_.size()) { + return false; + } + for (uint32_t i = 0; i < count; ++i) { + uint32_t descriptor_index; + if (!view_bindless_heap_free_.empty()) { + descriptor_index = view_bindless_heap_free_.back(); + view_bindless_heap_free_.pop_back(); + } else { + descriptor_index = view_bindless_heap_allocated_++; + } + view_bindless_one_use_descriptors_.push_back( + std::make_pair(descriptor_index, submission_current_)); + handles_out[i] = + std::make_pair(provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, descriptor_index), + provider->OffsetViewDescriptor( + view_bindless_heap_gpu_start_, descriptor_index)); + } + } else { + // Request a range within the current heap for bindful resources path. + D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle_start; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_start; + if (RequestViewBindfulDescriptors( + ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, count, count, + cpu_handle_start, gpu_handle_start) == + ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + return false; + } + for (uint32_t i = 0; i < count; ++i) { + handles_out[i] = + std::make_pair(provider->OffsetViewDescriptor(cpu_handle_start, i), + provider->OffsetViewDescriptor(gpu_handle_start, i)); + } + } + return true; +} + +ui::d3d12::util::DescriptorCPUGPUHandlePair +D3D12CommandProcessor::GetSystemBindlessViewHandlePair( + SystemBindlessView view) const { + assert_true(bindless_resources_used_); + auto provider = GetD3D12Context()->GetD3D12Provider(); + return std::make_pair(provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, uint32_t(view)), + provider->OffsetViewDescriptor( + view_bindless_heap_gpu_start_, uint32_t(view))); +} + +uint64_t D3D12CommandProcessor::RequestSamplerBindfulDescriptors( uint64_t previous_heap_index, uint32_t count_for_partial_update, uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) { + assert_false(bindless_resources_used_); + assert_true(submission_open_); uint32_t descriptor_index; - uint64_t current_heap_index = sampler_heap_pool_->Request( + uint64_t current_heap_index = sampler_bindful_heap_pool_->Request( frame_current_, previous_heap_index, count_for_partial_update, count_for_full_update, descriptor_index); if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { // There was an error. return ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; } - ID3D12DescriptorHeap* heap = sampler_heap_pool_->GetLastRequestHeap(); - if (current_sampler_heap_ != heap) { - current_sampler_heap_ = heap; - deferred_command_list_->SetDescriptorHeaps(current_view_heap_, - current_sampler_heap_); + ID3D12DescriptorHeap* heap = sampler_bindful_heap_pool_->GetLastRequestHeap(); + if (sampler_bindful_heap_current_ != heap) { + sampler_bindful_heap_current_ = heap; + deferred_command_list_->SetDescriptorHeaps(view_bindful_heap_current_, + sampler_bindful_heap_current_); } uint32_t descriptor_offset = descriptor_index * GetD3D12Context()->GetD3D12Provider()->GetSamplerDescriptorSize(); cpu_handle_out.ptr = - sampler_heap_pool_->GetLastRequestHeapCPUStart().ptr + descriptor_offset; + sampler_bindful_heap_pool_->GetLastRequestHeapCPUStart().ptr + + descriptor_offset; gpu_handle_out.ptr = - sampler_heap_pool_->GetLastRequestHeapGPUStart().ptr + descriptor_offset; + sampler_bindful_heap_pool_->GetLastRequestHeapGPUStart().ptr + + descriptor_offset; return current_heap_index; } @@ -538,10 +613,8 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( return nullptr; } if (scratch_buffer_ != nullptr) { - BufferForDeletion buffer_for_deletion; - buffer_for_deletion.buffer = scratch_buffer_; - buffer_for_deletion.last_usage_submission = submission_current_; - buffers_for_deletion_.push_back(buffer_for_deletion); + buffers_for_deletion_.push_back( + std::make_pair(scratch_buffer_, submission_current_)); } scratch_buffer_ = buffer; scratch_buffer_size_ = size; @@ -573,7 +646,7 @@ void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) { command_list_1_) { auto provider = GetD3D12Context()->GetD3D12Provider(); auto tier = provider->GetProgrammableSamplePositionsTier(); - if (tier >= 2) { + if (tier >= D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_2) { // Depth buffer transitions are affected by sample positions. SubmitBarriers(); // Standard sample positions in Direct3D 10.1, but adjusted to take the @@ -660,6 +733,18 @@ void D3D12CommandProcessor::SetExternalGraphicsPipeline( } } +void D3D12CommandProcessor::NotifyShaderBindingsLayoutUIDsInvalidated() { + if (bindless_resources_used_) { + cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; + cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; + } else { + bindful_textures_written_vertex_ = false; + bindful_textures_written_pixel_ = false; + bindful_samplers_written_vertex_ = false; + bindful_samplers_written_pixel_ = false; + } +} + std::string D3D12CommandProcessor::GetWindowTitleText() const { if (render_target_cache_) { if (!edram_rov_used_) { @@ -780,19 +865,268 @@ bool D3D12CommandProcessor::SetupContext() { command_list_->QueryInterface(IID_PPV_ARGS(&command_list_1_)); deferred_command_list_ = std::make_unique(this); + bindless_resources_used_ = + cvars::d3d12_bindless_resources && + provider->GetResourceBindingTier() >= D3D12_RESOURCE_BINDING_TIER_2; + edram_rov_used_ = + cvars::d3d12_edram_rov && provider->AreRasterizerOrderedViewsSupported(); + + // Initialize resource binding. constant_buffer_pool_ = std::make_unique(device, 1024 * 1024); - view_heap_pool_ = std::make_unique( - device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 32768); - // Direct3D 12 only allows shader-visible heaps with no more than 2048 - // samplers (due to Nvidia addressing). However, there's also possibly a weird - // bug in the Nvidia driver (tested on 440.97 and earlier on Windows 10 1803) - // that caused the sampler with index 2047 not to work if a heap with 8 or - // less samplers also exists - in case of Xenia, it's the immediate drawer's - // sampler heap. - // FIXME(Triang3l): Investigate the issue with the sampler 2047 on Nvidia. - sampler_heap_pool_ = std::make_unique( - device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2000); + if (bindless_resources_used_) { + D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc; + view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + view_bindless_heap_desc.NumDescriptors = kViewBindlessHeapSize; + view_bindless_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + view_bindless_heap_desc.NodeMask = 0; + if (FAILED(device->CreateDescriptorHeap( + &view_bindless_heap_desc, IID_PPV_ARGS(&view_bindless_heap_)))) { + XELOGE("Failed to create the bindless CBV/SRV/UAV descriptor heap"); + return false; + } + view_bindless_heap_cpu_start_ = + view_bindless_heap_->GetCPUDescriptorHandleForHeapStart(); + view_bindless_heap_gpu_start_ = + view_bindless_heap_->GetGPUDescriptorHandleForHeapStart(); + view_bindless_heap_allocated_ = uint32_t(SystemBindlessView::kCount); + + D3D12_DESCRIPTOR_HEAP_DESC sampler_bindless_heap_desc; + sampler_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + sampler_bindless_heap_desc.NumDescriptors = kSamplerHeapSize; + sampler_bindless_heap_desc.Flags = + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + sampler_bindless_heap_desc.NodeMask = 0; + if (FAILED(device->CreateDescriptorHeap( + &sampler_bindless_heap_desc, + IID_PPV_ARGS(&sampler_bindless_heap_current_)))) { + XELOGE("Failed to create the bindless sampler descriptor heap"); + return false; + } + sampler_bindless_heap_cpu_start_ = + sampler_bindless_heap_current_->GetCPUDescriptorHandleForHeapStart(); + sampler_bindless_heap_gpu_start_ = + sampler_bindless_heap_current_->GetGPUDescriptorHandleForHeapStart(); + sampler_bindless_heap_allocated_ = 0; + } else { + view_bindful_heap_pool_ = std::make_unique( + device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, kViewBindfulHeapSize); + sampler_bindful_heap_pool_ = + std::make_unique( + device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, kSamplerHeapSize); + } + + if (bindless_resources_used_) { + // Global bindless resource root signatures. + D3D12_ROOT_SIGNATURE_DESC root_signature_bindless_desc; + D3D12_ROOT_PARAMETER + root_parameters_bindless[kRootParameter_Bindless_Count]; + root_signature_bindless_desc.NumParameters = kRootParameter_Bindless_Count; + root_signature_bindless_desc.pParameters = root_parameters_bindless; + root_signature_bindless_desc.NumStaticSamplers = 0; + root_signature_bindless_desc.pStaticSamplers = nullptr; + root_signature_bindless_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + // Fetch constants. + { + auto& parameter = + root_parameters_bindless[kRootParameter_Bindless_FetchConstants]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = + uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants); + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + } + // Vertex float constants. + { + auto& parameter = root_parameters_bindless + [kRootParameter_Bindless_FloatConstantsVertex]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = + uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants); + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + } + // Pixel float constants. + { + auto& parameter = + root_parameters_bindless[kRootParameter_Bindless_FloatConstantsPixel]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = + uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants); + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + } + // Pixel shader descriptor indices. + { + auto& parameter = root_parameters_bindless + [kRootParameter_Bindless_DescriptorIndicesPixel]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = + uint32_t(DxbcShaderTranslator::CbufferRegister::kDescriptorIndices); + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + } + // Vertex shader descriptor indices. + { + auto& parameter = root_parameters_bindless + [kRootParameter_Bindless_DescriptorIndicesVertex]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = + uint32_t(DxbcShaderTranslator::CbufferRegister::kDescriptorIndices); + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + } + // System constants. + { + auto& parameter = + root_parameters_bindless[kRootParameter_Bindless_SystemConstants]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = + uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants); + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + } + // Bool and loop constants. + { + auto& parameter = + root_parameters_bindless[kRootParameter_Bindless_BoolLoopConstants]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + parameter.Descriptor.ShaderRegister = + uint32_t(DxbcShaderTranslator::CbufferRegister::kBoolLoopConstants); + parameter.Descriptor.RegisterSpace = 0; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + } + // Sampler heap. + D3D12_DESCRIPTOR_RANGE root_bindless_sampler_range; + { + auto& parameter = + root_parameters_bindless[kRootParameter_Bindless_SamplerHeap]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + // Will be appending. + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = + &root_bindless_sampler_range; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_bindless_sampler_range.RangeType = + D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + root_bindless_sampler_range.NumDescriptors = UINT_MAX; + root_bindless_sampler_range.BaseShaderRegister = 0; + root_bindless_sampler_range.RegisterSpace = 0; + root_bindless_sampler_range.OffsetInDescriptorsFromTableStart = 0; + } + // View heap. + D3D12_DESCRIPTOR_RANGE root_bindless_view_ranges[6]; + { + auto& parameter = + root_parameters_bindless[kRootParameter_Bindless_ViewHeap]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + // Will be appending. + parameter.DescriptorTable.NumDescriptorRanges = 0; + parameter.DescriptorTable.pDescriptorRanges = root_bindless_view_ranges; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + // 2D array textures. + { + assert_true(parameter.DescriptorTable.NumDescriptorRanges < + xe::countof(root_bindless_view_ranges)); + auto& range = root_bindless_view_ranges[parameter.DescriptorTable + .NumDescriptorRanges++]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = UINT_MAX; + range.BaseShaderRegister = 0; + range.RegisterSpace = + UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures2DArray); + range.OffsetInDescriptorsFromTableStart = 0; + } + // 3D textures. + { + assert_true(parameter.DescriptorTable.NumDescriptorRanges < + xe::countof(root_bindless_view_ranges)); + auto& range = root_bindless_view_ranges[parameter.DescriptorTable + .NumDescriptorRanges++]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = UINT_MAX; + range.BaseShaderRegister = 0; + range.RegisterSpace = + UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures3D); + range.OffsetInDescriptorsFromTableStart = 0; + } + // Cube textures. + { + assert_true(parameter.DescriptorTable.NumDescriptorRanges < + xe::countof(root_bindless_view_ranges)); + auto& range = root_bindless_view_ranges[parameter.DescriptorTable + .NumDescriptorRanges++]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = UINT_MAX; + range.BaseShaderRegister = 0; + range.RegisterSpace = + UINT(DxbcShaderTranslator::SRVSpace::kBindlessTexturesCube); + range.OffsetInDescriptorsFromTableStart = 0; + } + // Shared memory SRV. + { + assert_true(parameter.DescriptorTable.NumDescriptorRanges < + xe::countof(root_bindless_view_ranges)); + auto& range = root_bindless_view_ranges[parameter.DescriptorTable + .NumDescriptorRanges++]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = 1; + range.BaseShaderRegister = + UINT(DxbcShaderTranslator::SRVMainRegister::kSharedMemory); + range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kMain); + range.OffsetInDescriptorsFromTableStart = + UINT(SystemBindlessView::kSharedMemoryRawSRV); + } + // Shared memory UAV. + { + assert_true(parameter.DescriptorTable.NumDescriptorRanges < + xe::countof(root_bindless_view_ranges)); + auto& range = root_bindless_view_ranges[parameter.DescriptorTable + .NumDescriptorRanges++]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + range.NumDescriptors = 1; + range.BaseShaderRegister = + UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = + UINT(SystemBindlessView::kSharedMemoryRawUAV); + } + // EDRAM. + if (edram_rov_used_) { + assert_true(parameter.DescriptorTable.NumDescriptorRanges < + xe::countof(root_bindless_view_ranges)); + auto& range = root_bindless_view_ranges[parameter.DescriptorTable + .NumDescriptorRanges++]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + range.NumDescriptors = 1; + range.BaseShaderRegister = + UINT(DxbcShaderTranslator::UAVRegister::kEDRAM); + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = + UINT(SystemBindlessView::kEDRAMR32UintUAV); + } + } + root_signature_bindless_vs_ = ui::d3d12::util::CreateRootSignature( + provider, root_signature_bindless_desc); + if (!root_signature_bindless_vs_) { + XELOGE( + "Failed to create the global root signature for bindless resources, " + "the version for use without tessellation"); + return false; + } + root_parameters_bindless[kRootParameter_Bindless_FloatConstantsVertex] + .ShaderVisibility = D3D12_SHADER_VISIBILITY_DOMAIN; + root_parameters_bindless[kRootParameter_Bindless_DescriptorIndicesVertex] + .ShaderVisibility = D3D12_SHADER_VISIBILITY_DOMAIN; + root_signature_bindless_ds_ = ui::d3d12::util::CreateRootSignature( + provider, root_signature_bindless_desc); + if (!root_signature_bindless_ds_) { + XELOGE( + "Failed to create the global root signature for bindless resources, " + "the version for use with tessellation"); + return false; + } + } shared_memory_ = std::make_unique(this, memory_, &trace_writer_); @@ -801,25 +1135,23 @@ bool D3D12CommandProcessor::SetupContext() { return false; } - edram_rov_used_ = - cvars::d3d12_edram_rov && provider->AreRasterizerOrderedViewsSupported(); - - texture_cache_ = std::make_unique(this, register_file_, - shared_memory_.get()); + texture_cache_ = std::make_unique( + this, register_file_, bindless_resources_used_, shared_memory_.get()); if (!texture_cache_->Initialize(edram_rov_used_)) { XELOGE("Failed to initialize the texture cache"); return false; } render_target_cache_ = std::make_unique( - this, register_file_, &trace_writer_, edram_rov_used_); + this, register_file_, &trace_writer_, bindless_resources_used_, + edram_rov_used_); if (!render_target_cache_->Initialize(texture_cache_.get())) { XELOGE("Failed to initialize the render target cache"); return false; } pipeline_cache_ = std::make_unique( - this, register_file_, edram_rov_used_, + this, register_file_, bindless_resources_used_, edram_rov_used_, texture_cache_->IsResolutionScale2X() ? 2 : 1); if (!pipeline_cache_->Initialize()) { XELOGE("Failed to initialize the graphics pipeline state cache"); @@ -948,14 +1280,90 @@ bool D3D12CommandProcessor::SetupContext() { swap_texture_, &swap_srv_desc, swap_texture_srv_descriptor_heap_->GetCPUDescriptorHandleForHeapStart()); + if (bindless_resources_used_) { + // Create the system bindless descriptors once all resources are + // initialized. + // kNullTexture2DArray. + D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc; + null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + null_srv_desc.Shader4ComponentMapping = + D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0); + null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + null_srv_desc.Texture2DArray.MostDetailedMip = 0; + null_srv_desc.Texture2DArray.MipLevels = 1; + null_srv_desc.Texture2DArray.FirstArraySlice = 0; + null_srv_desc.Texture2DArray.ArraySize = 1; + null_srv_desc.Texture2DArray.PlaneSlice = 0; + null_srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f; + device->CreateShaderResourceView( + nullptr, &null_srv_desc, + provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kNullTexture2DArray))); + // kNullTexture3D. + null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + null_srv_desc.Texture3D.MostDetailedMip = 0; + null_srv_desc.Texture3D.MipLevels = 1; + null_srv_desc.Texture3D.ResourceMinLODClamp = 0.0f; + device->CreateShaderResourceView( + nullptr, &null_srv_desc, + provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kNullTexture3D))); + // kNullTextureCube. + null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + null_srv_desc.TextureCube.MostDetailedMip = 0; + null_srv_desc.TextureCube.MipLevels = 1; + null_srv_desc.TextureCube.ResourceMinLODClamp = 0.0f; + device->CreateShaderResourceView( + nullptr, &null_srv_desc, + provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kNullTextureCube))); + // kSharedMemoryRawSRV. + shared_memory_->WriteRawSRVDescriptor(provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kSharedMemoryRawSRV))); + // kSharedMemoryRawUAV. + shared_memory_->WriteRawUAVDescriptor(provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kSharedMemoryRawUAV))); + // kEDRAMR32UintUAV. + render_target_cache_->WriteEDRAMR32UintUAVDescriptor( + provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kEDRAMR32UintUAV))); + // kEDRAMRawSRV. + render_target_cache_->WriteEDRAMRawSRVDescriptor( + provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kEDRAMRawSRV))); + // kEDRAMRawUAV. + render_target_cache_->WriteEDRAMRawUAVDescriptor( + provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kEDRAMRawUAV))); + // kGammaRampNormalSRV. + WriteGammaRampSRV(false, + provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kGammaRampNormalSRV))); + // kGammaRampPWLSRV. + WriteGammaRampSRV(true, + provider->OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kGammaRampPWLSRV))); + } + pix_capture_requested_.store(false, std::memory_order_relaxed); pix_capturing_ = false; // Just not to expose uninitialized memory. std::memset(&system_constants_, 0, sizeof(system_constants_)); - // Force writing of new format data. - std::memset(system_constants_color_formats_, 0xFF, - sizeof(system_constants_color_formats_)); return true; } @@ -970,7 +1378,7 @@ void D3D12CommandProcessor::ShutdownContext() { scratch_buffer_size_ = 0; for (auto& buffer_for_deletion : buffers_for_deletion_) { - buffer_for_deletion.buffer->Release(); + buffer_for_deletion.first->Release(); } buffers_for_deletion_.clear(); @@ -999,10 +1407,6 @@ void D3D12CommandProcessor::ShutdownContext() { ui::d3d12::util::ReleaseAndNull(gamma_ramp_upload_); ui::d3d12::util::ReleaseAndNull(gamma_ramp_texture_); - sampler_heap_pool_.reset(); - view_heap_pool_.reset(); - constant_buffer_pool_.reset(); - primitive_converter_.reset(); pipeline_cache_.reset(); @@ -1011,13 +1415,37 @@ void D3D12CommandProcessor::ShutdownContext() { texture_cache_.reset(); - // Root signatured are used by pipelines, thus freed after the pipelines. - for (auto it : root_signatures_) { + shared_memory_.reset(); + + // Shut down binding - bindless descriptors may be owned by subsystems like + // the texture cache. + + // Root signatured are used by pipeline states, thus freed after the pipeline + // states. + ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_); + ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_); + for (auto it : root_signatures_bindful_) { it.second->Release(); } - root_signatures_.clear(); + root_signatures_bindful_.clear(); - shared_memory_.reset(); + if (bindless_resources_used_) { + texture_cache_bindless_sampler_map_.clear(); + for (const auto& sampler_bindless_heap_overflowed : + sampler_bindless_heaps_overflowed_) { + sampler_bindless_heap_overflowed.first->Release(); + } + sampler_bindless_heaps_overflowed_.clear(); + sampler_bindless_heap_allocated_ = 0; + ui::d3d12::util::ReleaseAndNull(sampler_bindless_heap_current_); + view_bindless_one_use_descriptors_.clear(); + view_bindless_heap_free_.clear(); + ui::d3d12::util::ReleaseAndNull(view_bindless_heap_); + } else { + sampler_bindful_heap_pool_.reset(); + view_bindful_heap_pool_.reset(); + } + constant_buffer_pool_.reset(); deferred_command_list_.reset(); ui::d3d12::util::ReleaseAndNull(command_list_1_); @@ -1054,21 +1482,21 @@ void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { float_constant_index -= 256; if (current_float_constant_map_pixel_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { - cbuffer_bindings_float_pixel_.up_to_date = false; + cbuffer_binding_float_pixel_.up_to_date = false; } } else { if (current_float_constant_map_vertex_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { - cbuffer_bindings_float_vertex_.up_to_date = false; + cbuffer_binding_float_vertex_.up_to_date = false; } } } } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { - cbuffer_bindings_bool_loop_.up_to_date = false; + cbuffer_binding_bool_loop_.up_to_date = false; } else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 && index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { - cbuffer_bindings_fetch_.up_to_date = false; + cbuffer_binding_fetch_.up_to_date = false; if (texture_cache_ != nullptr) { texture_cache_->TextureFetchConstantWritten( (index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6); @@ -1094,8 +1522,7 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, // In case the swap command is the only one in the frame. BeginSubmission(true); - auto provider = GetD3D12Context()->GetD3D12Provider(); - auto device = provider->GetDevice(); + auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); // Upload the new gamma ramps, using the upload buffer for the current frame // (will close the frame after this anyway, so can't write multiple times per @@ -1159,37 +1586,39 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, swap_texture_srv_desc, frontbuffer_format); if (swap_texture_resource) { render_target_cache_->FlushAndUnbindRenderTargets(); - D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; - D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; - if (RequestViewDescriptors(ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, - 2, 2, descriptor_cpu_start, - descriptor_gpu_start) != - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + + // This is according to D3D::InitializePresentationParameters from a game + // executable, which initializes the normal gamma ramp for 8_8_8_8 output + // and the PWL gamma ramp for 2_10_10_10. + bool use_pwl_gamma_ramp = + frontbuffer_format == TextureFormat::k_2_10_10_10 || + frontbuffer_format == TextureFormat::k_2_10_10_10_AS_16_16_16_16; + + bool descriptors_obtained; + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_swap_texture; + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_gamma_ramp; + if (bindless_resources_used_) { + descriptors_obtained = + RequestOneUseSingleViewDescriptors(1, &descriptor_swap_texture); + descriptor_gamma_ramp = GetSystemBindlessViewHandlePair( + use_pwl_gamma_ramp ? SystemBindlessView::kGammaRampPWLSRV + : SystemBindlessView::kGammaRampNormalSRV); + } else { + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2]; + descriptors_obtained = RequestOneUseSingleViewDescriptors(2, descriptors); + if (descriptors_obtained) { + descriptor_swap_texture = descriptors[0]; + descriptor_gamma_ramp = descriptors[1]; + WriteGammaRampSRV(use_pwl_gamma_ramp, descriptor_gamma_ramp.first); + } + } + if (descriptors_obtained) { // Must not call anything that can change the descriptor heap from now on! // Create the swap texture descriptor. - device->CreateShaderResourceView( - swap_texture_resource, &swap_texture_srv_desc, descriptor_cpu_start); - - // Create the gamma ramp texture descriptor. - // This is according to D3D::InitializePresentationParameters from a game - // executable, which initializes the normal gamma ramp for 8_8_8_8 output - // and the PWL gamma ramp for 2_10_10_10. - bool use_pwl_gamma_ramp = - frontbuffer_format == TextureFormat::k_2_10_10_10 || - frontbuffer_format == TextureFormat::k_2_10_10_10_AS_16_16_16_16; - D3D12_SHADER_RESOURCE_VIEW_DESC gamma_ramp_srv_desc; - gamma_ramp_srv_desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; - gamma_ramp_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - gamma_ramp_srv_desc.Shader4ComponentMapping = - D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - gamma_ramp_srv_desc.Texture1D.MostDetailedMip = - use_pwl_gamma_ramp ? 1 : 0; - gamma_ramp_srv_desc.Texture1D.MipLevels = 1; - gamma_ramp_srv_desc.Texture1D.ResourceMinLODClamp = 0.0f; - device->CreateShaderResourceView( - gamma_ramp_texture_, &gamma_ramp_srv_desc, - provider->OffsetViewDescriptor(descriptor_cpu_start, 1)); + device->CreateShaderResourceView(swap_texture_resource, + &swap_texture_srv_desc, + descriptor_swap_texture.first); // The swap texture is kept as an SRV because the graphics system may draw // with it at any time. It's switched to RTV and back when needed. @@ -1222,10 +1651,8 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, deferred_command_list_->RSSetScissorRect(scissor); D3D12GraphicsSystem* graphics_system = static_cast(graphics_system_); - D3D12_GPU_DESCRIPTOR_HANDLE gamma_ramp_gpu_handle = - provider->OffsetViewDescriptor(descriptor_gpu_start, 1); graphics_system->StretchTextureToFrontBuffer( - descriptor_gpu_start, &gamma_ramp_gpu_handle, + descriptor_swap_texture.second, &descriptor_gamma_ramp.second, use_pwl_gamma_ramp ? (1.0f / 128.0f) : (1.0f / 256.0f), *deferred_command_list_); // Ending the current frame anyway, so no need to reset the current render @@ -1879,17 +2306,25 @@ void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) { command_allocator_submitted_last_ = nullptr; } - // Delete transient buffers marked for deletion. - auto erase_buffers_end = buffers_for_deletion_.begin(); - while (erase_buffers_end != buffers_for_deletion_.end()) { - if (erase_buffers_end->last_usage_submission > submission_completed_) { - ++erase_buffers_end; + // Release single-use bindless descriptors. + while (!view_bindless_one_use_descriptors_.empty()) { + if (view_bindless_one_use_descriptors_.front().second > + submission_completed_) { break; } - erase_buffers_end->buffer->Release(); - ++erase_buffers_end; + ReleaseViewBindlessDescriptorImmediately( + view_bindless_one_use_descriptors_.front().first); + view_bindless_one_use_descriptors_.pop_front(); + } + + // Delete transient buffers marked for deletion. + while (!buffers_for_deletion_.empty()) { + if (buffers_for_deletion_.front().second > submission_completed_) { + break; + } + buffers_for_deletion_.front().first->Release(); + buffers_for_deletion_.pop_front(); } - buffers_for_deletion_.erase(buffers_for_deletion_.begin(), erase_buffers_end); shared_memory_->CompletedSubmissionUpdated(); @@ -1949,8 +2384,13 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { current_external_pipeline_ = nullptr; current_graphics_root_signature_ = nullptr; current_graphics_root_up_to_date_ = 0; - current_view_heap_ = nullptr; - current_sampler_heap_ = nullptr; + if (bindless_resources_used_) { + deferred_command_list_->SetDescriptorHeaps( + view_bindless_heap_, sampler_bindless_heap_current_); + } else { + view_bindful_heap_current_ = nullptr; + sampler_bindful_heap_current_ = nullptr; + } primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; render_target_cache_->BeginSubmission(); @@ -1966,23 +2406,32 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { sizeof(current_float_constant_map_vertex_)); std::memset(current_float_constant_map_pixel_, 0, sizeof(current_float_constant_map_pixel_)); - cbuffer_bindings_system_.up_to_date = false; - cbuffer_bindings_float_vertex_.up_to_date = false; - cbuffer_bindings_float_pixel_.up_to_date = false; - cbuffer_bindings_bool_loop_.up_to_date = false; - cbuffer_bindings_fetch_.up_to_date = false; - draw_view_heap_index_ = ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; - draw_sampler_heap_index_ = ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; - texture_bindings_written_vertex_ = false; - texture_bindings_written_pixel_ = false; - samplers_written_vertex_ = false; - samplers_written_pixel_ = false; + cbuffer_binding_system_.up_to_date = false; + cbuffer_binding_float_vertex_.up_to_date = false; + cbuffer_binding_float_pixel_.up_to_date = false; + cbuffer_binding_bool_loop_.up_to_date = false; + cbuffer_binding_fetch_.up_to_date = false; + if (bindless_resources_used_) { + cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; + cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; + } else { + draw_view_bindful_heap_index_ = + ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; + draw_sampler_bindful_heap_index_ = + ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; + bindful_textures_written_vertex_ = false; + bindful_textures_written_pixel_ = false; + bindful_samplers_written_vertex_ = false; + bindful_samplers_written_pixel_ = false; + } // Reclaim pool pages - no need to do this every small submission since some // may be reused. constant_buffer_pool_->Reclaim(frame_completed_); - view_heap_pool_->Reclaim(frame_completed_); - sampler_heap_pool_->Reclaim(frame_completed_); + if (!bindless_resources_used_) { + view_bindful_heap_pool_->Reclaim(frame_completed_); + sampler_bindful_heap_pool_->Reclaim(frame_completed_); + } pix_capturing_ = pix_capture_requested_.exchange(false, std::memory_order_relaxed); @@ -2090,8 +2539,18 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) { ui::d3d12::util::ReleaseAndNull(scratch_buffer_); scratch_buffer_size_ = 0; - sampler_heap_pool_->ClearCache(); - view_heap_pool_->ClearCache(); + if (bindless_resources_used_) { + texture_cache_bindless_sampler_map_.clear(); + for (const auto& sampler_bindless_heap_overflowed : + sampler_bindless_heaps_overflowed_) { + sampler_bindless_heap_overflowed.first->Release(); + } + sampler_bindless_heaps_overflowed_.clear(); + sampler_bindless_heap_allocated_ = 0; + } else { + sampler_bindful_heap_pool_->ClearCache(); + view_bindful_heap_pool_->ClearCache(); + } constant_buffer_pool_->ClearCache(); primitive_converter_->ClearCache(); @@ -2102,10 +2561,10 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) { texture_cache_->ClearCache(); - for (auto it : root_signatures_) { + for (auto it : root_signatures_bindful_) { it.second->Release(); } - root_signatures_.clear(); + root_signatures_bindful_.clear(); shared_memory_->ClearCache(); } @@ -2896,7 +3355,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; } - cbuffer_bindings_system_.up_to_date &= !dirty; + cbuffer_binding_system_.up_to_date &= !dirty; } bool D3D12CommandProcessor::UpdateBindings( @@ -2910,85 +3369,38 @@ bool D3D12CommandProcessor::UpdateBindings( SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - // Bind the new root signature. + // Set the new root signature. if (current_graphics_root_signature_ != root_signature) { current_graphics_root_signature_ = root_signature; - GetRootExtraParameterIndices(vertex_shader, pixel_shader, - current_graphics_root_extras_); - // We don't know which root parameters are up to date anymore. + if (!bindless_resources_used_) { + GetRootBindfulExtraParameterIndices( + vertex_shader, pixel_shader, current_graphics_root_bindful_extras_); + } + // Changing the root signature invalidates all bindings. current_graphics_root_up_to_date_ = 0; deferred_command_list_->D3DSetGraphicsRootSignature(root_signature); } - XXH64_state_t hash_state; + // Select the root parameter indices depending on the used binding model. + uint32_t root_parameter_fetch_constants = + bindless_resources_used_ ? kRootParameter_Bindless_FetchConstants + : kRootParameter_Bindful_FetchConstants; + uint32_t root_parameter_float_constants_vertex = + bindless_resources_used_ ? kRootParameter_Bindless_FloatConstantsVertex + : kRootParameter_Bindful_FloatConstantsVertex; + uint32_t root_parameter_float_constants_pixel = + bindless_resources_used_ ? kRootParameter_Bindless_FloatConstantsPixel + : kRootParameter_Bindful_FloatConstantsPixel; + uint32_t root_parameter_system_constants = + bindless_resources_used_ ? kRootParameter_Bindless_SystemConstants + : kRootParameter_Bindful_SystemConstants; + uint32_t root_parameter_bool_loop_constants = + bindless_resources_used_ ? kRootParameter_Bindless_BoolLoopConstants + : kRootParameter_Bindful_BoolLoopConstants; - // Get textures and samplers used by the vertex shader. - uint32_t texture_count_vertex, sampler_count_vertex; - const D3D12Shader::TextureSRV* textures_vertex = - vertex_shader->GetTextureSRVs(texture_count_vertex); - uint64_t texture_bindings_hash_vertex = - texture_count_vertex != 0 - ? texture_cache_->GetDescriptorHashForActiveTextures( - textures_vertex, texture_count_vertex) - : 0; - const D3D12Shader::SamplerBinding* samplers_vertex = - vertex_shader->GetSamplerBindings(sampler_count_vertex); - XXH64_reset(&hash_state, 0); - for (uint32_t i = 0; i < sampler_count_vertex; ++i) { - TextureCache::SamplerParameters sampler_parameters = - texture_cache_->GetSamplerParameters(samplers_vertex[i]); - XXH64_update(&hash_state, &sampler_parameters, sizeof(sampler_parameters)); - } - uint64_t samplers_hash_vertex = XXH64_digest(&hash_state); - - // Get textures and samplers used by the pixel shader. - uint32_t texture_count_pixel, sampler_count_pixel; - const D3D12Shader::TextureSRV* textures_pixel; - const D3D12Shader::SamplerBinding* samplers_pixel; - if (pixel_shader != nullptr) { - textures_pixel = pixel_shader->GetTextureSRVs(texture_count_pixel); - samplers_pixel = pixel_shader->GetSamplerBindings(sampler_count_pixel); - } else { - textures_pixel = nullptr; - texture_count_pixel = 0; - samplers_pixel = nullptr; - sampler_count_pixel = 0; - } - uint64_t texture_bindings_hash_pixel = - texture_count_pixel != 0 - ? texture_cache_->GetDescriptorHashForActiveTextures( - textures_pixel, texture_count_pixel) - : 0; - XXH64_reset(&hash_state, 0); - for (uint32_t i = 0; i < sampler_count_pixel; ++i) { - TextureCache::SamplerParameters sampler_parameters = - texture_cache_->GetSamplerParameters(samplers_pixel[i]); - XXH64_update(&hash_state, &sampler_parameters, sizeof(sampler_parameters)); - } - uint64_t samplers_hash_pixel = XXH64_digest(&hash_state); - - // Begin updating descriptors. - bool write_system_constant_view = false; - bool write_float_constant_view_vertex = false; - bool write_float_constant_view_pixel = false; - bool write_bool_loop_constant_view = false; - bool write_fetch_constant_view = false; - bool write_textures_vertex = - texture_count_vertex != 0 && - (!texture_bindings_written_vertex_ || - current_texture_bindings_hash_vertex_ != texture_bindings_hash_vertex); - bool write_textures_pixel = - texture_count_pixel != 0 && - (!texture_bindings_written_pixel_ || - current_texture_bindings_hash_pixel_ != texture_bindings_hash_pixel); - bool write_samplers_vertex = - sampler_count_vertex != 0 && - (!samplers_written_vertex_ || - current_samplers_hash_vertex_ != samplers_hash_vertex); - bool write_samplers_pixel = - sampler_count_pixel != 0 && - (!samplers_written_pixel_ || - current_samplers_hash_pixel_ != samplers_hash_pixel); + // + // Update root constant buffers that are common for bindful and bindless. + // // These are the constant base addresses/ranges for shaders. // We have these hardcoded right now cause nothing seems to differ on the Xbox @@ -2997,7 +3409,6 @@ bool D3D12CommandProcessor::UpdateBindings( regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); - // Check if the float constant layout is still the same and get the counts. const Shader::ConstantRegisterMap& float_constant_map_vertex = vertex_shader->constant_register_map(); @@ -3017,7 +3428,7 @@ bool D3D12CommandProcessor::UpdateBindings( // If no float constants at all, we can reuse any buffer for them, so not // invalidating. if (float_constant_map_vertex.float_count != 0) { - cbuffer_bindings_float_vertex_.up_to_date = false; + cbuffer_binding_float_vertex_.up_to_date = false; } } } @@ -3032,7 +3443,7 @@ bool D3D12CommandProcessor::UpdateBindings( current_float_constant_map_pixel_[i] = float_constant_map_pixel.float_bitmap[i]; if (float_constant_map_pixel.float_count != 0) { - cbuffer_bindings_float_pixel_.up_to_date = false; + cbuffer_binding_float_pixel_.up_to_date = false; } } } @@ -3044,23 +3455,24 @@ bool D3D12CommandProcessor::UpdateBindings( uint32_t(std::max(float_constant_count_pixel, 1u) * 4 * sizeof(float)), 256u); - // Update constant buffers. - if (!cbuffer_bindings_system_.up_to_date) { + // Write the constant buffer data. + if (!cbuffer_binding_system_.up_to_date) { uint8_t* system_constants = constant_buffer_pool_->Request( frame_current_, xe::align(uint32_t(sizeof(system_constants_)), 256u), - nullptr, nullptr, &cbuffer_bindings_system_.buffer_address); + nullptr, nullptr, &cbuffer_binding_system_.address); if (system_constants == nullptr) { return false; } std::memcpy(system_constants, &system_constants_, sizeof(system_constants_)); - cbuffer_bindings_system_.up_to_date = true; - write_system_constant_view = true; + cbuffer_binding_system_.up_to_date = true; + current_graphics_root_up_to_date_ &= + ~(1u << root_parameter_system_constants); } - if (!cbuffer_bindings_float_vertex_.up_to_date) { + if (!cbuffer_binding_float_vertex_.up_to_date) { uint8_t* float_constants = constant_buffer_pool_->Request( frame_current_, float_constant_size_vertex, nullptr, nullptr, - &cbuffer_bindings_float_vertex_.buffer_address); + &cbuffer_binding_float_vertex_.address); if (float_constants == nullptr) { return false; } @@ -3079,13 +3491,14 @@ bool D3D12CommandProcessor::UpdateBindings( float_constants += 4 * sizeof(float); } } - cbuffer_bindings_float_vertex_.up_to_date = true; - write_float_constant_view_vertex = true; + cbuffer_binding_float_vertex_.up_to_date = true; + current_graphics_root_up_to_date_ &= + ~(1u << root_parameter_float_constants_vertex); } - if (!cbuffer_bindings_float_pixel_.up_to_date) { + if (!cbuffer_binding_float_pixel_.up_to_date) { uint8_t* float_constants = constant_buffer_pool_->Request( frame_current_, float_constant_size_pixel, nullptr, nullptr, - &cbuffer_bindings_float_pixel_.buffer_address); + &cbuffer_binding_float_pixel_.address); if (float_constants == nullptr) { return false; } @@ -3108,331 +3521,628 @@ bool D3D12CommandProcessor::UpdateBindings( } } } - cbuffer_bindings_float_pixel_.up_to_date = true; - write_float_constant_view_pixel = true; + cbuffer_binding_float_pixel_.up_to_date = true; + current_graphics_root_up_to_date_ &= + ~(1u << root_parameter_float_constants_pixel); } - if (!cbuffer_bindings_bool_loop_.up_to_date) { - uint8_t* bool_loop_constants = constant_buffer_pool_->Request( - frame_current_, 256, nullptr, nullptr, - &cbuffer_bindings_bool_loop_.buffer_address); + if (!cbuffer_binding_bool_loop_.up_to_date) { + uint8_t* bool_loop_constants = + constant_buffer_pool_->Request(frame_current_, 256, nullptr, nullptr, + &cbuffer_binding_bool_loop_.address); if (bool_loop_constants == nullptr) { return false; } std::memcpy(bool_loop_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, (8 + 32) * sizeof(uint32_t)); - cbuffer_bindings_bool_loop_.up_to_date = true; - write_bool_loop_constant_view = true; + cbuffer_binding_bool_loop_.up_to_date = true; + current_graphics_root_up_to_date_ &= + ~(1u << root_parameter_bool_loop_constants); } - if (!cbuffer_bindings_fetch_.up_to_date) { - uint8_t* fetch_constants = - constant_buffer_pool_->Request(frame_current_, 768, nullptr, nullptr, - &cbuffer_bindings_fetch_.buffer_address); + if (!cbuffer_binding_fetch_.up_to_date) { + uint8_t* fetch_constants = constant_buffer_pool_->Request( + frame_current_, 768, nullptr, nullptr, &cbuffer_binding_fetch_.address); if (fetch_constants == nullptr) { return false; } std::memcpy(fetch_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, 32 * 6 * sizeof(uint32_t)); - cbuffer_bindings_fetch_.up_to_date = true; - write_fetch_constant_view = true; + cbuffer_binding_fetch_.up_to_date = true; + current_graphics_root_up_to_date_ &= + ~(1u << root_parameter_fetch_constants); } - // Allocate the descriptors. - uint32_t view_count_partial_update = 0; - if (write_system_constant_view) { - ++view_count_partial_update; + // + // Update descriptors. + // + + // Get textures and samplers used by the vertex shader, check if the last used + // samplers are compatible and update them. + size_t texture_layout_uid_vertex = + vertex_shader->GetTextureBindingLayoutUserUID(); + size_t sampler_layout_uid_vertex = + vertex_shader->GetSamplerBindingLayoutUserUID(); + uint32_t texture_count_vertex, sampler_count_vertex; + const D3D12Shader::TextureBinding* textures_vertex = + vertex_shader->GetTextureBindings(texture_count_vertex); + const D3D12Shader::SamplerBinding* samplers_vertex = + vertex_shader->GetSamplerBindings(sampler_count_vertex); + if (sampler_count_vertex) { + if (current_sampler_layout_uid_vertex_ != sampler_layout_uid_vertex) { + current_sampler_layout_uid_vertex_ = sampler_layout_uid_vertex; + cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; + bindful_samplers_written_vertex_ = false; + } + current_samplers_vertex_.resize(std::max(current_samplers_vertex_.size(), + size_t(sampler_count_vertex))); + for (uint32_t i = 0; i < sampler_count_vertex; ++i) { + TextureCache::SamplerParameters parameters = + texture_cache_->GetSamplerParameters(samplers_vertex[i]); + if (current_samplers_vertex_[i] != parameters) { + cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; + bindful_samplers_written_vertex_ = false; + current_samplers_vertex_[i] = parameters; + } + } } - if (write_float_constant_view_vertex) { - ++view_count_partial_update; + + // Get textures and samplers used by the pixel shader, check if the last used + // samplers are compatible and update them. + size_t texture_layout_uid_pixel, sampler_layout_uid_pixel; + uint32_t texture_count_pixel, sampler_count_pixel; + const D3D12Shader::TextureBinding* textures_pixel; + const D3D12Shader::SamplerBinding* samplers_pixel; + if (pixel_shader != nullptr) { + texture_layout_uid_pixel = pixel_shader->GetTextureBindingLayoutUserUID(); + sampler_layout_uid_pixel = pixel_shader->GetSamplerBindingLayoutUserUID(); + textures_pixel = pixel_shader->GetTextureBindings(texture_count_pixel); + samplers_pixel = pixel_shader->GetSamplerBindings(sampler_count_pixel); + if (sampler_count_pixel) { + if (current_sampler_layout_uid_pixel_ != sampler_layout_uid_pixel) { + current_sampler_layout_uid_pixel_ = sampler_layout_uid_pixel; + cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; + bindful_samplers_written_pixel_ = false; + } + current_samplers_pixel_.resize(std::max(current_samplers_pixel_.size(), + size_t(sampler_count_pixel))); + for (uint32_t i = 0; i < sampler_count_pixel; ++i) { + TextureCache::SamplerParameters parameters = + texture_cache_->GetSamplerParameters(samplers_pixel[i]); + if (current_samplers_pixel_[i] != parameters) { + current_samplers_pixel_[i] = parameters; + cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; + bindful_samplers_written_pixel_ = false; + } + } + } + } else { + texture_layout_uid_pixel = PipelineCache::kLayoutUIDEmpty; + sampler_layout_uid_pixel = PipelineCache::kLayoutUIDEmpty; + textures_pixel = nullptr; + texture_count_pixel = 0; + samplers_pixel = nullptr; + sampler_count_pixel = 0; } - if (write_float_constant_view_pixel) { - ++view_count_partial_update; - } - if (write_bool_loop_constant_view) { - ++view_count_partial_update; - } - if (write_fetch_constant_view) { - ++view_count_partial_update; - } - if (write_textures_vertex) { - view_count_partial_update += texture_count_vertex; - } - if (write_textures_pixel) { - view_count_partial_update += texture_count_pixel; - } - // All the constants + shared memory SRV and UAV + textures. - uint32_t view_count_full_update = - 7 + texture_count_vertex + texture_count_pixel; - if (edram_rov_used_) { - // + EDRAM UAV. - ++view_count_full_update; - } - D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; - D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle; - uint32_t descriptor_size_view = provider->GetViewDescriptorSize(); - uint64_t view_heap_index = RequestViewDescriptors( - draw_view_heap_index_, view_count_partial_update, view_count_full_update, - view_cpu_handle, view_gpu_handle); - if (view_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { - XELOGE("Failed to allocate view descriptors!"); - return false; - } - uint32_t sampler_count_partial_update = 0; - if (write_samplers_vertex) { - sampler_count_partial_update += sampler_count_vertex; - } - if (write_samplers_pixel) { - sampler_count_partial_update += sampler_count_pixel; - } - D3D12_CPU_DESCRIPTOR_HANDLE sampler_cpu_handle = {}; - D3D12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle = {}; - uint32_t descriptor_size_sampler = provider->GetSamplerDescriptorSize(); - uint64_t sampler_heap_index = - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; - if (sampler_count_vertex != 0 || sampler_count_pixel != 0) { - sampler_heap_index = RequestSamplerDescriptors( - draw_sampler_heap_index_, sampler_count_partial_update, - sampler_count_vertex + sampler_count_pixel, sampler_cpu_handle, - sampler_gpu_handle); - if (sampler_heap_index == - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { - XELOGE("Failed to allocate sampler descriptors!"); + + assert_true(sampler_count_vertex + sampler_count_pixel <= kSamplerHeapSize); + + if (bindless_resources_used_) { + // + // Bindless descriptors path. + // + + // Check if need to write new descriptor indices. + // Samplers have already been checked. + if (texture_count_vertex && + cbuffer_binding_descriptor_indices_vertex_.up_to_date && + (current_texture_layout_uid_vertex_ != texture_layout_uid_vertex || + !texture_cache_->AreActiveTextureSRVKeysUpToDate( + current_texture_srv_keys_vertex_.data(), textures_vertex, + texture_count_vertex))) { + cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; + } + if (texture_count_pixel && + cbuffer_binding_descriptor_indices_pixel_.up_to_date && + (current_texture_layout_uid_pixel_ != texture_layout_uid_pixel || + !texture_cache_->AreActiveTextureSRVKeysUpToDate( + current_texture_srv_keys_pixel_.data(), textures_pixel, + texture_count_pixel))) { + cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; + } + + // Get sampler descriptor indices, write new samplers, and handle sampler + // heap overflow if it happens. + if ((sampler_count_vertex && + !cbuffer_binding_descriptor_indices_vertex_.up_to_date) || + (sampler_count_pixel && + !cbuffer_binding_descriptor_indices_pixel_.up_to_date)) { + for (uint32_t i = 0; i < 2; ++i) { + if (i) { + // Overflow happened - invalidate sampler bindings because their + // descriptor indices can't be used anymore (and even if heap creation + // fails, because current_sampler_bindless_indices_#_ are in an + // undefined state now) and switch to a new sampler heap. + cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; + cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; + ID3D12DescriptorHeap* sampler_heap_new; + if (!sampler_bindless_heaps_overflowed_.empty() && + sampler_bindless_heaps_overflowed_.front().second <= + submission_completed_) { + sampler_heap_new = sampler_bindless_heaps_overflowed_.front().first; + sampler_bindless_heaps_overflowed_.pop_front(); + } else { + D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_new_desc; + sampler_heap_new_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + sampler_heap_new_desc.NumDescriptors = kSamplerHeapSize; + sampler_heap_new_desc.Flags = + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + sampler_heap_new_desc.NodeMask = 0; + if (FAILED(device->CreateDescriptorHeap( + &sampler_heap_new_desc, IID_PPV_ARGS(&sampler_heap_new)))) { + XELOGE( + "Failed to create a new bindless sampler descriptor heap " + "after an overflow of the previous one"); + return false; + } + } + // Only change the heap if a new heap was created successfully, not to + // leave the values in an undefined state in case CreateDescriptorHeap + // has failed. + sampler_bindless_heaps_overflowed_.push_back(std::make_pair( + sampler_bindless_heap_current_, submission_current_)); + sampler_bindless_heap_current_ = sampler_heap_new; + sampler_bindless_heap_cpu_start_ = + sampler_bindless_heap_current_ + ->GetCPUDescriptorHandleForHeapStart(); + sampler_bindless_heap_gpu_start_ = + sampler_bindless_heap_current_ + ->GetGPUDescriptorHandleForHeapStart(); + sampler_bindless_heap_allocated_ = 0; + // The only thing the heap is used for now is texture cache samplers - + // invalidate all of them. + texture_cache_bindless_sampler_map_.clear(); + deferred_command_list_->SetDescriptorHeaps( + view_bindless_heap_, sampler_bindless_heap_current_); + current_graphics_root_up_to_date_ &= + ~(1u << kRootParameter_Bindless_SamplerHeap); + } + bool samplers_overflowed = false; + if (sampler_count_vertex && + !cbuffer_binding_descriptor_indices_vertex_.up_to_date) { + current_sampler_bindless_indices_vertex_.resize( + std::max(current_sampler_bindless_indices_vertex_.size(), + size_t(sampler_count_vertex))); + for (uint32_t j = 0; j < sampler_count_vertex; ++j) { + TextureCache::SamplerParameters sampler_parameters = + current_samplers_vertex_[j]; + uint32_t sampler_index; + auto it = texture_cache_bindless_sampler_map_.find( + sampler_parameters.value); + if (it != texture_cache_bindless_sampler_map_.end()) { + sampler_index = it->second; + } else { + if (sampler_bindless_heap_allocated_ >= kSamplerHeapSize) { + samplers_overflowed = true; + break; + } + sampler_index = sampler_bindless_heap_allocated_++; + texture_cache_->WriteSampler( + sampler_parameters, + provider->OffsetViewDescriptor( + sampler_bindless_heap_cpu_start_, sampler_index)); + texture_cache_bindless_sampler_map_.insert( + {sampler_parameters.value, sampler_index}); + } + current_sampler_bindless_indices_vertex_[j] = sampler_index; + } + } + if (samplers_overflowed) { + continue; + } + if (sampler_count_pixel && + !cbuffer_binding_descriptor_indices_pixel_.up_to_date) { + current_sampler_bindless_indices_pixel_.resize( + std::max(current_sampler_bindless_indices_pixel_.size(), + size_t(sampler_count_pixel))); + for (uint32_t j = 0; j < sampler_count_pixel; ++j) { + TextureCache::SamplerParameters sampler_parameters = + current_samplers_pixel_[j]; + uint32_t sampler_index; + auto it = texture_cache_bindless_sampler_map_.find( + sampler_parameters.value); + if (it != texture_cache_bindless_sampler_map_.end()) { + sampler_index = it->second; + } else { + if (sampler_bindless_heap_allocated_ >= kSamplerHeapSize) { + samplers_overflowed = true; + break; + } + sampler_index = sampler_bindless_heap_allocated_++; + texture_cache_->WriteSampler( + sampler_parameters, + provider->OffsetViewDescriptor( + sampler_bindless_heap_cpu_start_, sampler_index)); + texture_cache_bindless_sampler_map_.insert( + {sampler_parameters.value, sampler_index}); + } + current_sampler_bindless_indices_pixel_[j] = sampler_index; + } + } + if (!samplers_overflowed) { + break; + } + } + } + + if (!cbuffer_binding_descriptor_indices_vertex_.up_to_date) { + uint32_t* descriptor_indices = + reinterpret_cast(constant_buffer_pool_->Request( + frame_current_, + xe::align( + uint32_t(std::max(texture_count_vertex + sampler_count_vertex, + uint32_t(1)) * + sizeof(uint32_t)), + uint32_t(256)), + nullptr, nullptr, + &cbuffer_binding_descriptor_indices_vertex_.address)); + if (!descriptor_indices) { + return false; + } + for (uint32_t i = 0; i < texture_count_vertex; ++i) { + const D3D12Shader::TextureBinding& texture = textures_vertex[i]; + descriptor_indices[texture.bindless_descriptor_index] = + texture_cache_->GetActiveTextureBindlessSRVIndex(texture); + } + current_texture_layout_uid_vertex_ = texture_layout_uid_vertex; + if (texture_count_vertex) { + current_texture_srv_keys_vertex_.resize( + std::max(current_texture_srv_keys_vertex_.size(), + size_t(texture_count_vertex))); + texture_cache_->WriteActiveTextureSRVKeys( + current_texture_srv_keys_vertex_.data(), textures_vertex, + texture_count_vertex); + } + // Current samplers have already been updated. + for (uint32_t i = 0; i < sampler_count_vertex; ++i) { + descriptor_indices[samplers_vertex[i].bindless_descriptor_index] = + current_sampler_bindless_indices_vertex_[i]; + } + cbuffer_binding_descriptor_indices_vertex_.up_to_date = true; + current_graphics_root_up_to_date_ &= + ~(1u << kRootParameter_Bindless_DescriptorIndicesVertex); + } + + if (!cbuffer_binding_descriptor_indices_pixel_.up_to_date) { + uint32_t* descriptor_indices = + reinterpret_cast(constant_buffer_pool_->Request( + frame_current_, + xe::align( + uint32_t(std::max(texture_count_pixel + sampler_count_pixel, + uint32_t(1)) * + sizeof(uint32_t)), + uint32_t(256)), + nullptr, nullptr, + &cbuffer_binding_descriptor_indices_pixel_.address)); + if (!descriptor_indices) { + return false; + } + for (uint32_t i = 0; i < texture_count_pixel; ++i) { + const D3D12Shader::TextureBinding& texture = textures_pixel[i]; + descriptor_indices[texture.bindless_descriptor_index] = + texture_cache_->GetActiveTextureBindlessSRVIndex(texture); + } + current_texture_layout_uid_pixel_ = texture_layout_uid_pixel; + if (texture_count_pixel) { + current_texture_srv_keys_pixel_.resize( + std::max(current_texture_srv_keys_pixel_.size(), + size_t(texture_count_pixel))); + texture_cache_->WriteActiveTextureSRVKeys( + current_texture_srv_keys_pixel_.data(), textures_pixel, + texture_count_pixel); + } + // Current samplers have already been updated. + for (uint32_t i = 0; i < sampler_count_pixel; ++i) { + descriptor_indices[samplers_pixel[i].bindless_descriptor_index] = + current_sampler_bindless_indices_pixel_[i]; + } + cbuffer_binding_descriptor_indices_pixel_.up_to_date = true; + current_graphics_root_up_to_date_ &= + ~(1u << kRootParameter_Bindless_DescriptorIndicesPixel); + } + } else { + // + // Bindful descriptors path. + // + + // See what descriptors need to be updated. + // Samplers have already been checked. + bool write_textures_vertex = + texture_count_vertex && + (!bindful_textures_written_vertex_ || + current_texture_layout_uid_vertex_ != texture_layout_uid_vertex || + !texture_cache_->AreActiveTextureSRVKeysUpToDate( + current_texture_srv_keys_vertex_.data(), textures_vertex, + texture_count_vertex)); + bool write_textures_pixel = + texture_count_pixel && + (!bindful_textures_written_pixel_ || + current_texture_layout_uid_pixel_ != texture_layout_uid_pixel || + !texture_cache_->AreActiveTextureSRVKeysUpToDate( + current_texture_srv_keys_pixel_.data(), textures_pixel, + texture_count_pixel)); + bool write_samplers_vertex = + sampler_count_vertex && !bindful_samplers_written_vertex_; + bool write_samplers_pixel = + sampler_count_pixel && !bindful_samplers_written_pixel_; + + // Allocate the descriptors. + uint32_t view_count_partial_update = 0; + if (write_textures_vertex) { + view_count_partial_update += texture_count_vertex; + } + if (write_textures_pixel) { + view_count_partial_update += texture_count_pixel; + } + // All the constants + shared memory SRV and UAV + textures. + uint32_t view_count_full_update = + 2 + texture_count_vertex + texture_count_pixel; + if (edram_rov_used_) { + // + EDRAM UAV. + ++view_count_full_update; + } + D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; + D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle; + uint32_t descriptor_size_view = provider->GetViewDescriptorSize(); + uint64_t view_heap_index = RequestViewBindfulDescriptors( + draw_view_bindful_heap_index_, view_count_partial_update, + view_count_full_update, view_cpu_handle, view_gpu_handle); + if (view_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + XELOGE("Failed to allocate view descriptors"); return false; } - } - if (draw_view_heap_index_ != view_heap_index) { - // Need to update all view descriptors. - write_system_constant_view = true; - write_fetch_constant_view = true; - write_float_constant_view_vertex = true; - write_float_constant_view_pixel = true; - write_bool_loop_constant_view = true; - write_textures_vertex = texture_count_vertex != 0; - write_textures_pixel = texture_count_pixel != 0; - texture_bindings_written_vertex_ = false; - texture_bindings_written_pixel_ = false; - // If updating fully, write the shared memory SRV and UAV descriptors and, - // if needed, the EDRAM descriptor. - gpu_handle_shared_memory_and_edram_ = view_gpu_handle; - shared_memory_->WriteRawSRVDescriptor(view_cpu_handle); - view_cpu_handle.ptr += descriptor_size_view; - view_gpu_handle.ptr += descriptor_size_view; - shared_memory_->WriteRawUAVDescriptor(view_cpu_handle); - view_cpu_handle.ptr += descriptor_size_view; - view_gpu_handle.ptr += descriptor_size_view; - if (edram_rov_used_) { - render_target_cache_->WriteEDRAMUint32UAVDescriptor(view_cpu_handle); + uint32_t sampler_count_partial_update = 0; + if (write_samplers_vertex) { + sampler_count_partial_update += sampler_count_vertex; + } + if (write_samplers_pixel) { + sampler_count_partial_update += sampler_count_pixel; + } + D3D12_CPU_DESCRIPTOR_HANDLE sampler_cpu_handle = {}; + D3D12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle = {}; + uint32_t descriptor_size_sampler = provider->GetSamplerDescriptorSize(); + uint64_t sampler_heap_index = + ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; + if (sampler_count_vertex != 0 || sampler_count_pixel != 0) { + sampler_heap_index = RequestSamplerBindfulDescriptors( + draw_sampler_bindful_heap_index_, sampler_count_partial_update, + sampler_count_vertex + sampler_count_pixel, sampler_cpu_handle, + sampler_gpu_handle); + if (sampler_heap_index == + ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + XELOGE("Failed to allocate sampler descriptors"); + return false; + } + } + if (draw_view_bindful_heap_index_ != view_heap_index) { + // Need to update all view descriptors. + write_textures_vertex = texture_count_vertex != 0; + write_textures_pixel = texture_count_pixel != 0; + bindful_textures_written_vertex_ = false; + bindful_textures_written_pixel_ = false; + // If updating fully, write the shared memory SRV and UAV descriptors and, + // if needed, the EDRAM descriptor. + gpu_handle_shared_memory_and_edram_ = view_gpu_handle; + shared_memory_->WriteRawSRVDescriptor(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; + shared_memory_->WriteRawUAVDescriptor(view_cpu_handle); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + if (edram_rov_used_) { + render_target_cache_->WriteEDRAMR32UintUAVDescriptor(view_cpu_handle); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + } + current_graphics_root_up_to_date_ &= + ~(1u << kRootParameter_Bindful_SharedMemoryAndEDRAM); + } + if (sampler_heap_index != + ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid && + draw_sampler_bindful_heap_index_ != sampler_heap_index) { + write_samplers_vertex = sampler_count_vertex != 0; + write_samplers_pixel = sampler_count_pixel != 0; + bindful_samplers_written_vertex_ = false; + bindful_samplers_written_pixel_ = false; } - current_graphics_root_up_to_date_ &= - ~(1u << kRootParameter_SharedMemoryAndEDRAM); - } - if (sampler_heap_index != ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid && - draw_sampler_heap_index_ != sampler_heap_index) { - write_samplers_vertex = sampler_count_vertex != 0; - write_samplers_pixel = sampler_count_pixel != 0; - samplers_written_vertex_ = false; - samplers_written_pixel_ = false; - } - // Write the descriptors. - D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_desc; - if (write_system_constant_view) { - gpu_handle_system_constants_ = view_gpu_handle; - constant_buffer_desc.BufferLocation = - cbuffer_bindings_system_.buffer_address; - constant_buffer_desc.SizeInBytes = - xe::align(uint32_t(sizeof(system_constants_)), 256u); - device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); - view_cpu_handle.ptr += descriptor_size_view; - view_gpu_handle.ptr += descriptor_size_view; - current_graphics_root_up_to_date_ &= - ~(1u << kRootParameter_SystemConstants); - } - if (write_float_constant_view_vertex) { - gpu_handle_float_constants_vertex_ = view_gpu_handle; - constant_buffer_desc.BufferLocation = - cbuffer_bindings_float_vertex_.buffer_address; - constant_buffer_desc.SizeInBytes = float_constant_size_vertex; - device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); - view_cpu_handle.ptr += descriptor_size_view; - view_gpu_handle.ptr += descriptor_size_view; - current_graphics_root_up_to_date_ &= - ~(1u << kRootParameter_FloatConstantsVertex); - } - if (write_float_constant_view_pixel) { - gpu_handle_float_constants_pixel_ = view_gpu_handle; - constant_buffer_desc.BufferLocation = - cbuffer_bindings_float_pixel_.buffer_address; - constant_buffer_desc.SizeInBytes = float_constant_size_pixel; - device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); - view_cpu_handle.ptr += descriptor_size_view; - view_gpu_handle.ptr += descriptor_size_view; - current_graphics_root_up_to_date_ &= - ~(1u << kRootParameter_FloatConstantsPixel); - } - if (write_bool_loop_constant_view) { - gpu_handle_bool_loop_constants_ = view_gpu_handle; - constant_buffer_desc.BufferLocation = - cbuffer_bindings_bool_loop_.buffer_address; - constant_buffer_desc.SizeInBytes = 256; - device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); - view_cpu_handle.ptr += descriptor_size_view; - view_gpu_handle.ptr += descriptor_size_view; - current_graphics_root_up_to_date_ &= - ~(1u << kRootParameter_BoolLoopConstants); - } - if (write_fetch_constant_view) { - gpu_handle_fetch_constants_ = view_gpu_handle; - constant_buffer_desc.BufferLocation = - cbuffer_bindings_fetch_.buffer_address; - constant_buffer_desc.SizeInBytes = 768; - device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); - view_cpu_handle.ptr += descriptor_size_view; - view_gpu_handle.ptr += descriptor_size_view; - current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_FetchConstants); - } - if (write_textures_vertex) { - assert_true(current_graphics_root_extras_.textures_vertex != - RootExtraParameterIndices::kUnavailable); - gpu_handle_textures_vertex_ = view_gpu_handle; - for (uint32_t i = 0; i < texture_count_vertex; ++i) { - const D3D12Shader::TextureSRV& srv = textures_vertex[i]; - texture_cache_->WriteTextureSRV(srv, view_cpu_handle); - view_cpu_handle.ptr += descriptor_size_view; - view_gpu_handle.ptr += descriptor_size_view; + // Write the descriptors. + if (write_textures_vertex) { + assert_true(current_graphics_root_bindful_extras_.textures_vertex != + RootBindfulExtraParameterIndices::kUnavailable); + gpu_handle_textures_vertex_ = view_gpu_handle; + for (uint32_t i = 0; i < texture_count_vertex; ++i) { + texture_cache_->WriteActiveTextureBindfulSRV(textures_vertex[i], + view_cpu_handle); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + } + current_texture_layout_uid_vertex_ = texture_layout_uid_vertex; + current_texture_srv_keys_vertex_.resize( + std::max(current_texture_srv_keys_vertex_.size(), + size_t(texture_count_vertex))); + texture_cache_->WriteActiveTextureSRVKeys( + current_texture_srv_keys_vertex_.data(), textures_vertex, + texture_count_vertex); + bindful_textures_written_vertex_ = true; + current_graphics_root_up_to_date_ &= + ~(1u << current_graphics_root_bindful_extras_.textures_vertex); } - texture_bindings_written_vertex_ = true; - current_texture_bindings_hash_vertex_ = texture_bindings_hash_vertex; - current_graphics_root_up_to_date_ &= - ~(1u << current_graphics_root_extras_.textures_vertex); - } - if (write_textures_pixel) { - assert_true(current_graphics_root_extras_.textures_pixel != - RootExtraParameterIndices::kUnavailable); - gpu_handle_textures_pixel_ = view_gpu_handle; - for (uint32_t i = 0; i < texture_count_pixel; ++i) { - const D3D12Shader::TextureSRV& srv = textures_pixel[i]; - texture_cache_->WriteTextureSRV(srv, view_cpu_handle); - view_cpu_handle.ptr += descriptor_size_view; - view_gpu_handle.ptr += descriptor_size_view; + if (write_textures_pixel) { + assert_true(current_graphics_root_bindful_extras_.textures_pixel != + RootBindfulExtraParameterIndices::kUnavailable); + gpu_handle_textures_pixel_ = view_gpu_handle; + for (uint32_t i = 0; i < texture_count_pixel; ++i) { + texture_cache_->WriteActiveTextureBindfulSRV(textures_pixel[i], + view_cpu_handle); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + } + current_texture_layout_uid_pixel_ = texture_layout_uid_pixel; + current_texture_srv_keys_pixel_.resize(std::max( + current_texture_srv_keys_pixel_.size(), size_t(texture_count_pixel))); + texture_cache_->WriteActiveTextureSRVKeys( + current_texture_srv_keys_pixel_.data(), textures_pixel, + texture_count_pixel); + bindful_textures_written_pixel_ = true; + current_graphics_root_up_to_date_ &= + ~(1u << current_graphics_root_bindful_extras_.textures_pixel); } - texture_bindings_written_pixel_ = true; - current_texture_bindings_hash_pixel_ = texture_bindings_hash_pixel; - current_graphics_root_up_to_date_ &= - ~(1u << current_graphics_root_extras_.textures_pixel); - } - if (write_samplers_vertex) { - assert_true(current_graphics_root_extras_.samplers_vertex != - RootExtraParameterIndices::kUnavailable); - gpu_handle_samplers_vertex_ = sampler_gpu_handle; - for (uint32_t i = 0; i < sampler_count_vertex; ++i) { - texture_cache_->WriteSampler( - texture_cache_->GetSamplerParameters(samplers_vertex[i]), - sampler_cpu_handle); - sampler_cpu_handle.ptr += descriptor_size_sampler; - sampler_gpu_handle.ptr += descriptor_size_sampler; + if (write_samplers_vertex) { + assert_true(current_graphics_root_bindful_extras_.samplers_vertex != + RootBindfulExtraParameterIndices::kUnavailable); + gpu_handle_samplers_vertex_ = sampler_gpu_handle; + for (uint32_t i = 0; i < sampler_count_vertex; ++i) { + texture_cache_->WriteSampler(current_samplers_vertex_[i], + sampler_cpu_handle); + sampler_cpu_handle.ptr += descriptor_size_sampler; + sampler_gpu_handle.ptr += descriptor_size_sampler; + } + // Current samplers have already been updated. + bindful_samplers_written_vertex_ = true; + current_graphics_root_up_to_date_ &= + ~(1u << current_graphics_root_bindful_extras_.samplers_vertex); } - samplers_written_vertex_ = true; - current_samplers_hash_vertex_ = samplers_hash_vertex; - current_graphics_root_up_to_date_ &= - ~(1u << current_graphics_root_extras_.samplers_vertex); - } - if (write_samplers_pixel) { - assert_true(current_graphics_root_extras_.samplers_pixel != - RootExtraParameterIndices::kUnavailable); - gpu_handle_samplers_pixel_ = sampler_gpu_handle; - for (uint32_t i = 0; i < sampler_count_pixel; ++i) { - texture_cache_->WriteSampler( - texture_cache_->GetSamplerParameters(samplers_pixel[i]), - sampler_cpu_handle); - sampler_cpu_handle.ptr += descriptor_size_sampler; - sampler_gpu_handle.ptr += descriptor_size_sampler; + if (write_samplers_pixel) { + assert_true(current_graphics_root_bindful_extras_.samplers_pixel != + RootBindfulExtraParameterIndices::kUnavailable); + gpu_handle_samplers_pixel_ = sampler_gpu_handle; + for (uint32_t i = 0; i < sampler_count_pixel; ++i) { + texture_cache_->WriteSampler(current_samplers_pixel_[i], + sampler_cpu_handle); + sampler_cpu_handle.ptr += descriptor_size_sampler; + sampler_gpu_handle.ptr += descriptor_size_sampler; + } + // Current samplers have already been updated. + bindful_samplers_written_pixel_ = true; + current_graphics_root_up_to_date_ &= + ~(1u << current_graphics_root_bindful_extras_.samplers_pixel); } - samplers_written_pixel_ = true; - current_samplers_hash_pixel_ = samplers_hash_pixel; - current_graphics_root_up_to_date_ &= - ~(1u << current_graphics_root_extras_.samplers_pixel); - } - // Wrote new descriptors on the current page. - draw_view_heap_index_ = view_heap_index; - if (sampler_heap_index != ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { - draw_sampler_heap_index_ = sampler_heap_index; + // Wrote new descriptors on the current page. + draw_view_bindful_heap_index_ = view_heap_index; + if (sampler_heap_index != + ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + draw_sampler_bindful_heap_index_ = sampler_heap_index; + } } // Update the root parameters. if (!(current_graphics_root_up_to_date_ & - (1u << kRootParameter_FetchConstants))) { - deferred_command_list_->D3DSetGraphicsRootDescriptorTable( - kRootParameter_FetchConstants, gpu_handle_fetch_constants_); - current_graphics_root_up_to_date_ |= 1u << kRootParameter_FetchConstants; + (1u << root_parameter_fetch_constants))) { + deferred_command_list_->D3DSetGraphicsRootConstantBufferView( + root_parameter_fetch_constants, cbuffer_binding_fetch_.address); + current_graphics_root_up_to_date_ |= 1u << root_parameter_fetch_constants; } if (!(current_graphics_root_up_to_date_ & - (1u << kRootParameter_FloatConstantsVertex))) { - deferred_command_list_->D3DSetGraphicsRootDescriptorTable( - kRootParameter_FloatConstantsVertex, - gpu_handle_float_constants_vertex_); + (1u << root_parameter_float_constants_vertex))) { + deferred_command_list_->D3DSetGraphicsRootConstantBufferView( + root_parameter_float_constants_vertex, + cbuffer_binding_float_vertex_.address); + current_graphics_root_up_to_date_ |= + 1u << root_parameter_float_constants_vertex; + } + if (!(current_graphics_root_up_to_date_ & + (1u << root_parameter_float_constants_pixel))) { + deferred_command_list_->D3DSetGraphicsRootConstantBufferView( + root_parameter_float_constants_pixel, + cbuffer_binding_float_pixel_.address); + current_graphics_root_up_to_date_ |= + 1u << root_parameter_float_constants_pixel; + } + if (!(current_graphics_root_up_to_date_ & + (1u << root_parameter_system_constants))) { + deferred_command_list_->D3DSetGraphicsRootConstantBufferView( + root_parameter_system_constants, cbuffer_binding_system_.address); + current_graphics_root_up_to_date_ |= 1u << root_parameter_system_constants; + } + if (!(current_graphics_root_up_to_date_ & + (1u << root_parameter_bool_loop_constants))) { + deferred_command_list_->D3DSetGraphicsRootConstantBufferView( + root_parameter_bool_loop_constants, cbuffer_binding_bool_loop_.address); current_graphics_root_up_to_date_ |= 1u - << kRootParameter_FloatConstantsVertex; + << root_parameter_bool_loop_constants; } - if (!(current_graphics_root_up_to_date_ & - (1u << kRootParameter_FloatConstantsPixel))) { - deferred_command_list_->D3DSetGraphicsRootDescriptorTable( - kRootParameter_FloatConstantsPixel, gpu_handle_float_constants_pixel_); - current_graphics_root_up_to_date_ |= 1u - << kRootParameter_FloatConstantsPixel; - } - if (!(current_graphics_root_up_to_date_ & - (1u << kRootParameter_SystemConstants))) { - deferred_command_list_->D3DSetGraphicsRootDescriptorTable( - kRootParameter_SystemConstants, gpu_handle_system_constants_); - current_graphics_root_up_to_date_ |= 1u << kRootParameter_SystemConstants; - } - if (!(current_graphics_root_up_to_date_ & - (1u << kRootParameter_BoolLoopConstants))) { - deferred_command_list_->D3DSetGraphicsRootDescriptorTable( - kRootParameter_BoolLoopConstants, gpu_handle_bool_loop_constants_); - current_graphics_root_up_to_date_ |= 1u << kRootParameter_BoolLoopConstants; - } - if (!(current_graphics_root_up_to_date_ & - (1u << kRootParameter_SharedMemoryAndEDRAM))) { - deferred_command_list_->D3DSetGraphicsRootDescriptorTable( - kRootParameter_SharedMemoryAndEDRAM, - gpu_handle_shared_memory_and_edram_); - current_graphics_root_up_to_date_ |= 1u - << kRootParameter_SharedMemoryAndEDRAM; - } - uint32_t extra_index; - extra_index = current_graphics_root_extras_.textures_pixel; - if (extra_index != RootExtraParameterIndices::kUnavailable && - !(current_graphics_root_up_to_date_ & (1u << extra_index))) { - deferred_command_list_->D3DSetGraphicsRootDescriptorTable( - extra_index, gpu_handle_textures_pixel_); - current_graphics_root_up_to_date_ |= 1u << extra_index; - } - extra_index = current_graphics_root_extras_.samplers_pixel; - if (extra_index != RootExtraParameterIndices::kUnavailable && - !(current_graphics_root_up_to_date_ & (1u << extra_index))) { - deferred_command_list_->D3DSetGraphicsRootDescriptorTable( - extra_index, gpu_handle_samplers_pixel_); - current_graphics_root_up_to_date_ |= 1u << extra_index; - } - extra_index = current_graphics_root_extras_.textures_vertex; - if (extra_index != RootExtraParameterIndices::kUnavailable && - !(current_graphics_root_up_to_date_ & (1u << extra_index))) { - deferred_command_list_->D3DSetGraphicsRootDescriptorTable( - extra_index, gpu_handle_textures_vertex_); - current_graphics_root_up_to_date_ |= 1u << extra_index; - } - extra_index = current_graphics_root_extras_.samplers_vertex; - if (extra_index != RootExtraParameterIndices::kUnavailable && - !(current_graphics_root_up_to_date_ & (1u << extra_index))) { - deferred_command_list_->D3DSetGraphicsRootDescriptorTable( - extra_index, gpu_handle_samplers_vertex_); - current_graphics_root_up_to_date_ |= 1u << extra_index; + if (bindless_resources_used_) { + if (!(current_graphics_root_up_to_date_ & + (1u << kRootParameter_Bindless_DescriptorIndicesPixel))) { + deferred_command_list_->D3DSetGraphicsRootConstantBufferView( + kRootParameter_Bindless_DescriptorIndicesPixel, + cbuffer_binding_descriptor_indices_pixel_.address); + current_graphics_root_up_to_date_ |= + 1u << kRootParameter_Bindless_DescriptorIndicesPixel; + } + if (!(current_graphics_root_up_to_date_ & + (1u << kRootParameter_Bindless_DescriptorIndicesVertex))) { + deferred_command_list_->D3DSetGraphicsRootConstantBufferView( + kRootParameter_Bindless_DescriptorIndicesVertex, + cbuffer_binding_descriptor_indices_vertex_.address); + current_graphics_root_up_to_date_ |= + 1u << kRootParameter_Bindless_DescriptorIndicesVertex; + } + if (!(current_graphics_root_up_to_date_ & + (1u << kRootParameter_Bindless_SamplerHeap))) { + deferred_command_list_->D3DSetGraphicsRootDescriptorTable( + kRootParameter_Bindless_SamplerHeap, + sampler_bindless_heap_gpu_start_); + current_graphics_root_up_to_date_ |= + 1u << kRootParameter_Bindless_SamplerHeap; + } + if (!(current_graphics_root_up_to_date_ & + (1u << kRootParameter_Bindless_ViewHeap))) { + deferred_command_list_->D3DSetGraphicsRootDescriptorTable( + kRootParameter_Bindless_ViewHeap, view_bindless_heap_gpu_start_); + current_graphics_root_up_to_date_ |= 1u + << kRootParameter_Bindless_ViewHeap; + } + } else { + if (!(current_graphics_root_up_to_date_ & + (1u << kRootParameter_Bindful_SharedMemoryAndEDRAM))) { + deferred_command_list_->D3DSetGraphicsRootDescriptorTable( + kRootParameter_Bindful_SharedMemoryAndEDRAM, + gpu_handle_shared_memory_and_edram_); + current_graphics_root_up_to_date_ |= + 1u << kRootParameter_Bindful_SharedMemoryAndEDRAM; + } + uint32_t extra_index; + extra_index = current_graphics_root_bindful_extras_.textures_pixel; + if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && + !(current_graphics_root_up_to_date_ & (1u << extra_index))) { + deferred_command_list_->D3DSetGraphicsRootDescriptorTable( + extra_index, gpu_handle_textures_pixel_); + current_graphics_root_up_to_date_ |= 1u << extra_index; + } + extra_index = current_graphics_root_bindful_extras_.samplers_pixel; + if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && + !(current_graphics_root_up_to_date_ & (1u << extra_index))) { + deferred_command_list_->D3DSetGraphicsRootDescriptorTable( + extra_index, gpu_handle_samplers_pixel_); + current_graphics_root_up_to_date_ |= 1u << extra_index; + } + extra_index = current_graphics_root_bindful_extras_.textures_vertex; + if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && + !(current_graphics_root_up_to_date_ & (1u << extra_index))) { + deferred_command_list_->D3DSetGraphicsRootDescriptorTable( + extra_index, gpu_handle_textures_vertex_); + current_graphics_root_up_to_date_ |= 1u << extra_index; + } + extra_index = current_graphics_root_bindful_extras_.samplers_vertex; + if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && + !(current_graphics_root_up_to_date_ & (1u << extra_index))) { + deferred_command_list_->D3DSetGraphicsRootDescriptorTable( + extra_index, gpu_handle_samplers_vertex_); + current_graphics_root_up_to_date_ |= 1u << extra_index; + } } return true; @@ -3494,6 +4204,20 @@ ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) { return readback_buffer_; } +void D3D12CommandProcessor::WriteGammaRampSRV( + bool is_pwl, D3D12_CPU_DESCRIPTOR_HANDLE handle) const { + auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); + D3D12_SHADER_RESOURCE_VIEW_DESC desc; + desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + // 256-entry for normal, 128-entry for PWL. + desc.Texture1D.MostDetailedMip = is_pwl ? 1 : 0; + desc.Texture1D.MipLevels = 1; + desc.Texture1D.ResourceMinLODClamp = 0.0f; + device->CreateShaderResourceView(gamma_ramp_texture_, &desc, handle); +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 0442ff2c4..0f6321002 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -29,6 +29,7 @@ #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" #include "xenia/ui/d3d12/d3d12_context.h" +#include "xenia/ui/d3d12/d3d12_util.h" #include "xenia/ui/d3d12/pools.h" namespace xe { @@ -53,8 +54,8 @@ class D3D12CommandProcessor : public CommandProcessor { void RestoreEDRAMSnapshot(const void* snapshot) override; // Needed by everything that owns transient objects. - xe::ui::d3d12::D3D12Context* GetD3D12Context() const { - return static_cast(context_.get()); + ui::d3d12::D3D12Context* GetD3D12Context() const { + return static_cast(context_.get()); } // Returns the deferred drawing command list for the currently open @@ -95,18 +96,43 @@ class D3D12CommandProcessor : public CommandProcessor { ui::d3d12::UploadBufferPool* GetConstantBufferPool() const { return constant_buffer_pool_.get(); } - // Request and automatically rebind descriptors on the draw command list. - // Refer to DescriptorHeapPool::Request for partial/full update explanation. - uint64_t RequestViewDescriptors(uint64_t previous_heap_index, - uint32_t count_for_partial_update, - uint32_t count_for_full_update, - D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, - D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out); - uint64_t RequestSamplerDescriptors( - uint64_t previous_heap_index, uint32_t count_for_partial_update, - uint32_t count_for_full_update, - D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, - D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out); + + D3D12_CPU_DESCRIPTOR_HANDLE GetViewBindlessHeapCPUStart() const { + assert_true(bindless_resources_used_); + return view_bindless_heap_cpu_start_; + } + D3D12_GPU_DESCRIPTOR_HANDLE GetViewBindlessHeapGPUStart() const { + assert_true(bindless_resources_used_); + return view_bindless_heap_gpu_start_; + } + // Returns UINT32_MAX if no free descriptors. + uint32_t RequestPersistentViewBindlessDescriptor(); + void ReleaseViewBindlessDescriptorImmediately(uint32_t descriptor_index); + // Request non-contiguous SRV/UAV descriptors for use only within the next + // draw or dispatch command done for internal purposes. May change the current + // descriptor heap. + bool RequestOneUseSingleViewDescriptors( + uint32_t count, ui::d3d12::util::DescriptorCPUGPUHandlePair* handles_out); + // These are needed often, so they are always allocated. + enum class SystemBindlessView : uint32_t { + kNullTexture2DArray, + kNullTexture3D, + kNullTextureCube, + + kSharedMemoryRawSRV, + kSharedMemoryRawUAV, + + kEDRAMR32UintUAV, + kEDRAMRawSRV, + kEDRAMRawUAV, + + kGammaRampNormalSRV, + kGammaRampPWLSRV, + + kCount, + }; + ui::d3d12::util::DescriptorCPUGPUHandlePair GetSystemBindlessViewHandlePair( + SystemBindlessView view) const; // Returns a single temporary GPU-side buffer within a submission for tasks // like texture untiling and resolving. @@ -148,6 +174,10 @@ class D3D12CommandProcessor : public CommandProcessor { bool changing_viewport = true, bool changing_blend_factor = false, bool changing_stencil_ref = false); + // For the pipeline state cache to call when binding layout UIDs may be + // reused. + void NotifyShaderBindingsLayoutUIDsInvalidated(); + // Returns the text to display in the GPU backend name in the window title. std::string GetWindowTitleText() const; @@ -180,36 +210,66 @@ class D3D12CommandProcessor : public CommandProcessor { static constexpr uint32_t kQueueFrames = 3; enum RootParameter : UINT { + // Keep the size of the root signature at each stage 13 dwords or less + // (better 12 or less) so it fits in user data on AMD. Descriptor tables are + // 1 dword, root descriptors are 2 dwords (however, root descriptors require + // less setup on the CPU - balance needs to be maintained). + + // CBVs are set in both bindful and bindless cases via root descriptors. + + // - Bindful resources - multiple root signatures depending on extra + // parameters. + // These are always present. // Very frequently changed, especially for UI draws, and for models drawn in // multiple parts - contains vertex and texture fetch constants. - kRootParameter_FetchConstants, + kRootParameter_Bindful_FetchConstants = 0, // +2 dwords = 2 in all. // Quite frequently changed (for one object drawn multiple times, for // instance - may contain projection matrices). - kRootParameter_FloatConstantsVertex, + kRootParameter_Bindful_FloatConstantsVertex, // +2 = 4 in VS. // Less frequently changed (per-material). - kRootParameter_FloatConstantsPixel, - // Rarely changed - system constants like viewport and alpha testing. - kRootParameter_SystemConstants, + kRootParameter_Bindful_FloatConstantsPixel, // +2 = 4 in PS. + // May stay the same across many draws. + kRootParameter_Bindful_SystemConstants, // +2 = 6 in all. // Pretty rarely used and rarely changed - flow control constants. - kRootParameter_BoolLoopConstants, + kRootParameter_Bindful_BoolLoopConstants, // +2 = 8 in all. // Never changed except for when starting a new descriptor heap - shared - // memory byte address buffer, and, if ROV is used for EDRAM, EDRAM UAV. - kRootParameter_SharedMemoryAndEDRAM, + // memory byte address buffer, and, if ROV is used for EDRAM, EDRAM R32_UINT + // UAV. + // SRV/UAV descriptor table. + kRootParameter_Bindful_SharedMemoryAndEDRAM, // +1 = 9 in all. - kRootParameter_Count_Base, + kRootParameter_Bindful_Count_Base, // Extra parameter that may or may not exist: - // - Pixel textures (t1+). - // - Pixel samplers (s0+). - // - Vertex textures (t1+). - // - Vertex samplers (s0+). + // - Pixel textures (+1 = 10 in PS). + // - Pixel samplers (+1 = 11 in PS). + // - Vertex textures (+1 = 10 in VS). + // - Vertex samplers (+1 = 11 in VS). - kRootParameter_Count_Max = kRootParameter_Count_Base + 4, + kRootParameter_Bindful_Count_Max = kRootParameter_Bindful_Count_Base + 4, + + // - Bindless resources - two global root signatures (for non-tessellated + // and tessellated drawing), so these are always present. + + kRootParameter_Bindless_FetchConstants = 0, // +2 = 2 in all. + kRootParameter_Bindless_FloatConstantsVertex, // +2 = 4 in VS. + kRootParameter_Bindless_FloatConstantsPixel, // +2 = 4 in PS. + // Changed per-material, texture and sampler descriptor indices. + kRootParameter_Bindless_DescriptorIndicesPixel, // +2 = 6 in PS. + kRootParameter_Bindless_DescriptorIndicesVertex, // +2 = 6 in VS. + kRootParameter_Bindless_SystemConstants, // +2 = 8 in all. + kRootParameter_Bindless_BoolLoopConstants, // +2 = 10 in all. + // Unbounded sampler descriptor table - changed in case of overflow. + kRootParameter_Bindless_SamplerHeap, // +1 = 11 in all. + // Unbounded SRV/UAV descriptor table - never changed. + kRootParameter_Bindless_ViewHeap, // +1 = 12 in all. + + kRootParameter_Bindless_Count, }; - struct RootExtraParameterIndices { + struct RootBindfulExtraParameterIndices { uint32_t textures_pixel; uint32_t samplers_pixel; uint32_t textures_vertex; @@ -218,9 +278,9 @@ class D3D12CommandProcessor : public CommandProcessor { }; // Gets the indices of optional root parameters. Returns the total parameter // count. - static uint32_t GetRootExtraParameterIndices( + static uint32_t GetRootBindfulExtraParameterIndices( const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, - RootExtraParameterIndices& indices_out); + RootBindfulExtraParameterIndices& indices_out); // BeginSubmission and EndSubmission may be called at any time. If there's an // open non-frame submission, BeginSubmission(true) will promote it to a @@ -247,6 +307,20 @@ class D3D12CommandProcessor : public CommandProcessor { // Need to await submission completion before calling. void ClearCommandAllocatorCache(); + // Request descriptors and automatically rebind the descriptor heap on the + // draw command list. Refer to DescriptorHeapPool::Request for partial/full + // update explanation. Doesn't work when bindless descriptors are used. + uint64_t RequestViewBindfulDescriptors( + uint64_t previous_heap_index, uint32_t count_for_partial_update, + uint32_t count_for_full_update, + D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, + D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out); + uint64_t RequestSamplerBindfulDescriptors( + uint64_t previous_heap_index, uint32_t count_for_partial_update, + uint32_t count_for_full_update, + D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, + D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out); + void UpdateFixedFunctionState(bool primitive_two_faced); void UpdateSystemConstantValues( bool shared_memory_is_uav, bool primitive_two_faced, @@ -268,6 +342,8 @@ class D3D12CommandProcessor : public CommandProcessor { // synchronizing immediately after use. Always in COPY_DEST state. ID3D12Resource* RequestReadbackBuffer(uint32_t size); + void WriteGammaRampSRV(bool is_pwl, D3D12_CPU_DESCRIPTOR_HANDLE handle) const; + bool cache_clear_requested_ = false; bool submission_open_ = false; @@ -298,28 +374,89 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12GraphicsCommandList1* command_list_1_ = nullptr; std::unique_ptr deferred_command_list_ = nullptr; - std::unique_ptr shared_memory_ = nullptr; - - // Root signatures for different descriptor counts. - std::unordered_map root_signatures_; - - std::unique_ptr pipeline_cache_ = nullptr; - + // Should bindless textures and samplers be used - many times faster + // UpdateBindings than bindful (that becomes a significant bottleneck with + // bindful - mainly because of CopyDescriptorsSimple, which takes the majority + // of UpdateBindings time, and that's outside the emulator's control even). + bool bindless_resources_used_ = false; // Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion // and blending performed in pixel shaders be used instead of host render // targets. bool edram_rov_used_ = false; + std::unique_ptr constant_buffer_pool_ = nullptr; + + static constexpr uint32_t kViewBindfulHeapSize = 32768; + static_assert(kViewBindfulHeapSize <= + D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1); + std::unique_ptr view_bindful_heap_pool_ = + nullptr; + // Currently bound descriptor heap - updated by RequestViewBindfulDescriptors. + ID3D12DescriptorHeap* view_bindful_heap_current_; + // Rationale: textures have 4 KB alignment in guest memory, and there can be + // 512 MB / 4 KB in total of them at most, and multiply by 3 for different + // swizzles, signedness, and multiple host textures for one guest texture, and + // transient descriptors. Though in reality there will be a lot fewer of + // course, this is just a "safe" value. The limit is 1000000 for resource + // binding tier 2. + static constexpr uint32_t kViewBindlessHeapSize = 262144; + static_assert(kViewBindlessHeapSize <= + D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2); + ID3D12DescriptorHeap* view_bindless_heap_ = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE view_bindless_heap_cpu_start_; + D3D12_GPU_DESCRIPTOR_HANDLE view_bindless_heap_gpu_start_; + uint32_t view_bindless_heap_allocated_ = 0; + std::vector view_bindless_heap_free_; + // , sorted by the submission + // number. + std::deque> view_bindless_one_use_descriptors_; + + // Direct3D 12 only allows shader-visible heaps with no more than 2048 + // samplers (due to Nvidia addressing). However, there's also possibly a weird + // bug in the Nvidia driver (tested on 440.97 and earlier on Windows 10 1803) + // that caused the sampler with index 2047 not to work if a heap with 8 or + // less samplers also exists - in case of Xenia, it's the immediate drawer's + // sampler heap. + // FIXME(Triang3l): Investigate the issue with the sampler 2047 on Nvidia. + static constexpr uint32_t kSamplerHeapSize = 2000; + static_assert(kSamplerHeapSize <= D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE); + std::unique_ptr sampler_bindful_heap_pool_ = + nullptr; + ID3D12DescriptorHeap* sampler_bindful_heap_current_; + ID3D12DescriptorHeap* sampler_bindless_heap_current_ = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE sampler_bindless_heap_cpu_start_; + D3D12_GPU_DESCRIPTOR_HANDLE sampler_bindless_heap_gpu_start_; + // Currently the sampler heap is used only for texture cache samplers, so + // individual samplers are never freed, and using a simple linear allocator + // inside the current heap without a free list. + uint32_t sampler_bindless_heap_allocated_ = 0; + // , if total sampler count used so far + // exceeds kSamplerHeapSize, and the heap has been switched (this is not a + // totally impossible situation considering Direct3D 9 has sampler parameter + // state instead of sampler objects, and having one "unimportant" parameter + // changed may result in doubling of sampler count). Sorted by the submission + // number (so checking if the first can be reused is enough). + std::deque> + sampler_bindless_heaps_overflowed_; + // TextureCache::SamplerParameters::value -> indices within the current + // bindless sampler heap. + std::unordered_map texture_cache_bindless_sampler_map_; + + // Root signatures for different descriptor counts. + std::unordered_map root_signatures_bindful_; + ID3D12RootSignature* root_signature_bindless_vs_ = nullptr; + ID3D12RootSignature* root_signature_bindless_ds_ = nullptr; + + std::unique_ptr shared_memory_ = nullptr; + + std::unique_ptr pipeline_cache_ = nullptr; + std::unique_ptr texture_cache_ = nullptr; std::unique_ptr render_target_cache_ = nullptr; std::unique_ptr primitive_converter_ = nullptr; - std::unique_ptr constant_buffer_pool_ = nullptr; - std::unique_ptr view_heap_pool_ = nullptr; - std::unique_ptr sampler_heap_pool_ = nullptr; - // Mip 0 contains the normal gamma ramp (256 entries), mip 1 contains the PWL // ramp (128 entries). DXGI_FORMAT_R10G10B10A2_UNORM 1D. ID3D12Resource* gamma_ramp_texture_ = nullptr; @@ -348,11 +485,8 @@ class D3D12CommandProcessor : public CommandProcessor { // Unsubmitted barrier batch. std::vector barriers_; - struct BufferForDeletion { - ID3D12Resource* buffer; - uint64_t last_usage_submission; - }; - std::deque buffers_for_deletion_; + // , sorted by the submission number. + std::deque> buffers_for_deletion_; static constexpr uint32_t kScratchBufferSizeIncrement = 16 * 1024 * 1024; ID3D12Resource* scratch_buffer_ = nullptr; @@ -390,18 +524,12 @@ class D3D12CommandProcessor : public CommandProcessor { // Currently bound graphics root signature. ID3D12RootSignature* current_graphics_root_signature_; // Extra parameters which may or may not be present. - RootExtraParameterIndices current_graphics_root_extras_; + RootBindfulExtraParameterIndices current_graphics_root_bindful_extras_; // Whether root parameters are up to date - reset if a new signature is bound. uint32_t current_graphics_root_up_to_date_; - // Currently bound descriptor heaps - update by RequestViewDescriptors and - // RequestSamplerDescriptors. - ID3D12DescriptorHeap* current_view_heap_; - ID3D12DescriptorHeap* current_sampler_heap_; - // System shader constants. DxbcShaderTranslator::SystemConstants system_constants_; - ColorRenderTargetFormat system_constants_color_formats_[4]; // Float constant usage masks of the last draw call. uint64_t current_float_constant_map_vertex_[4]; @@ -409,45 +537,48 @@ class D3D12CommandProcessor : public CommandProcessor { // Constant buffer bindings. struct ConstantBufferBinding { - D3D12_GPU_VIRTUAL_ADDRESS buffer_address; + D3D12_GPU_VIRTUAL_ADDRESS address; bool up_to_date; }; - ConstantBufferBinding cbuffer_bindings_system_; - ConstantBufferBinding cbuffer_bindings_float_vertex_; - ConstantBufferBinding cbuffer_bindings_float_pixel_; - ConstantBufferBinding cbuffer_bindings_bool_loop_; - ConstantBufferBinding cbuffer_bindings_fetch_; + ConstantBufferBinding cbuffer_binding_system_; + ConstantBufferBinding cbuffer_binding_float_vertex_; + ConstantBufferBinding cbuffer_binding_float_pixel_; + ConstantBufferBinding cbuffer_binding_bool_loop_; + ConstantBufferBinding cbuffer_binding_fetch_; + ConstantBufferBinding cbuffer_binding_descriptor_indices_vertex_; + ConstantBufferBinding cbuffer_binding_descriptor_indices_pixel_; // Pages with the descriptors currently used for handling Xenos draw calls. - uint64_t draw_view_heap_index_; - uint64_t draw_sampler_heap_index_; + uint64_t draw_view_bindful_heap_index_; + uint64_t draw_sampler_bindful_heap_index_; - // Whether the last used texture bindings have been written to the current - // view descriptor heap. - bool texture_bindings_written_vertex_; - bool texture_bindings_written_pixel_; - // Hashes of the last texture bindings written to the current view descriptor - // heap with the last used descriptor layout. Valid only when the - // corresponding "written" variables are true. - uint64_t current_texture_bindings_hash_vertex_; - uint64_t current_texture_bindings_hash_pixel_; + // Whether the last used texture sampler bindings have been written to the + // current view descriptor heap. + bool bindful_textures_written_vertex_; + bool bindful_textures_written_pixel_; + bool bindful_samplers_written_vertex_; + bool bindful_samplers_written_pixel_; + // Layout UIDs and last texture and sampler bindings written to the current + // descriptor heaps (for bindful) or descriptor index constant buffer (for + // bindless) with the last used descriptor layout. Valid only when: + // - For bindful, when bindful_#_written_#_ is true. + // - For bindless, when cbuffer_binding_descriptor_indices_#_.up_to_date is + // true. + size_t current_texture_layout_uid_vertex_; + size_t current_texture_layout_uid_pixel_; + size_t current_sampler_layout_uid_vertex_; + size_t current_sampler_layout_uid_pixel_; + // Size of these should be ignored when checking whether these are up to date, + // layout UID should be checked first (they will be different for different + // binding counts). + std::vector current_texture_srv_keys_vertex_; + std::vector current_texture_srv_keys_pixel_; + std::vector current_samplers_vertex_; + std::vector current_samplers_pixel_; + std::vector current_sampler_bindless_indices_vertex_; + std::vector current_sampler_bindless_indices_pixel_; - // Whether the last used samplers have been written to the current sampler - // descriptor heap. - bool samplers_written_vertex_; - bool samplers_written_pixel_; - // Hashes of the last sampler parameters written to the current sampler - // descriptor heap with the last used descriptor layout. Valid only when the - // corresponding "written" variables are true. - uint64_t current_samplers_hash_vertex_; - uint64_t current_samplers_hash_pixel_; - - // Latest descriptor handles used for handling Xenos draw calls. - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_system_constants_; - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_float_constants_vertex_; - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_float_constants_pixel_; - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_bool_loop_constants_; - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_fetch_constants_; + // Latest bindful descriptor handles used for handling Xenos draw calls. D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_edram_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_vertex_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_pixel_; diff --git a/src/xenia/gpu/d3d12/d3d12_shader.cc b/src/xenia/gpu/d3d12/d3d12_shader.cc index 2d929c3e7..900908f0e 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.cc +++ b/src/xenia/gpu/d3d12/d3d12_shader.cc @@ -9,6 +9,8 @@ #include "xenia/gpu/d3d12/d3d12_shader.h" +#include + #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/gpu/gpu_flags.h" @@ -18,8 +20,8 @@ namespace xe { namespace gpu { namespace d3d12 { -constexpr uint32_t D3D12Shader::kMaxTextureSRVIndexBits; -constexpr uint32_t D3D12Shader::kMaxTextureSRVs; +constexpr uint32_t D3D12Shader::kMaxTextureBindingIndexBits; +constexpr uint32_t D3D12Shader::kMaxTextureBindings; constexpr uint32_t D3D12Shader::kMaxSamplerBindingIndexBits; constexpr uint32_t D3D12Shader::kMaxSamplerBindings; @@ -28,34 +30,40 @@ D3D12Shader::D3D12Shader(ShaderType shader_type, uint64_t data_hash, : Shader(shader_type, data_hash, dword_ptr, dword_count) {} void D3D12Shader::SetTexturesAndSamplers( - const DxbcShaderTranslator::TextureSRV* texture_srvs, - uint32_t texture_srv_count, + const DxbcShaderTranslator::TextureBinding* texture_bindings, + uint32_t texture_binding_count, const DxbcShaderTranslator::SamplerBinding* sampler_bindings, uint32_t sampler_binding_count) { - texture_srvs_.clear(); - texture_srvs_.reserve(texture_srv_count); + texture_bindings_.clear(); + texture_bindings_.reserve(texture_binding_count); used_texture_mask_ = 0; - for (uint32_t i = 0; i < texture_srv_count; ++i) { - TextureSRV srv; - const DxbcShaderTranslator::TextureSRV& translator_srv = texture_srvs[i]; - srv.fetch_constant = translator_srv.fetch_constant; - srv.dimension = translator_srv.dimension; - srv.is_signed = translator_srv.is_signed; - texture_srvs_.push_back(srv); - used_texture_mask_ |= 1u << translator_srv.fetch_constant; + for (uint32_t i = 0; i < texture_binding_count; ++i) { + TextureBinding& binding = texture_bindings_.emplace_back(); + // For a stable hash. + std::memset(&binding, 0, sizeof(binding)); + const DxbcShaderTranslator::TextureBinding& translator_binding = + texture_bindings[i]; + binding.bindless_descriptor_index = + translator_binding.bindless_descriptor_index; + binding.fetch_constant = translator_binding.fetch_constant; + binding.dimension = translator_binding.dimension; + binding.is_signed = translator_binding.is_signed; + used_texture_mask_ |= 1u << translator_binding.fetch_constant; } sampler_bindings_.clear(); sampler_bindings_.reserve(sampler_binding_count); for (uint32_t i = 0; i < sampler_binding_count; ++i) { - SamplerBinding sampler; - const DxbcShaderTranslator::SamplerBinding& translator_sampler = + SamplerBinding binding; + const DxbcShaderTranslator::SamplerBinding& translator_binding = sampler_bindings[i]; - sampler.fetch_constant = translator_sampler.fetch_constant; - sampler.mag_filter = translator_sampler.mag_filter; - sampler.min_filter = translator_sampler.min_filter; - sampler.mip_filter = translator_sampler.mip_filter; - sampler.aniso_filter = translator_sampler.aniso_filter; - sampler_bindings_.push_back(sampler); + binding.bindless_descriptor_index = + translator_binding.bindless_descriptor_index; + binding.fetch_constant = translator_binding.fetch_constant; + binding.mag_filter = translator_binding.mag_filter; + binding.min_filter = translator_binding.min_filter; + binding.mip_filter = translator_binding.mip_filter; + binding.aniso_filter = translator_binding.aniso_filter; + sampler_bindings_.push_back(binding); } } diff --git a/src/xenia/gpu/d3d12/d3d12_shader.h b/src/xenia/gpu/d3d12/d3d12_shader.h index e6e9b3079..d2aaf0389 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.h +++ b/src/xenia/gpu/d3d12/d3d12_shader.h @@ -26,8 +26,8 @@ class D3D12Shader : public Shader { const uint32_t* dword_ptr, uint32_t dword_count); void SetTexturesAndSamplers( - const DxbcShaderTranslator::TextureSRV* texture_srvs, - uint32_t texture_srv_count, + const DxbcShaderTranslator::TextureBinding* texture_bindings, + uint32_t texture_binding_count, const DxbcShaderTranslator::SamplerBinding* sampler_bindings, uint32_t sampler_binding_count); @@ -44,18 +44,22 @@ class D3D12Shader : public Shader { bool DisassembleDxbc(const ui::d3d12::D3D12Provider* provider); - static constexpr uint32_t kMaxTextureSRVIndexBits = - DxbcShaderTranslator::kMaxTextureSRVIndexBits; - static constexpr uint32_t kMaxTextureSRVs = - DxbcShaderTranslator::kMaxTextureSRVs; - struct TextureSRV { + static constexpr uint32_t kMaxTextureBindingIndexBits = + DxbcShaderTranslator::kMaxTextureBindingIndexBits; + static constexpr uint32_t kMaxTextureBindings = + DxbcShaderTranslator::kMaxTextureBindings; + struct TextureBinding { + uint32_t bindless_descriptor_index; uint32_t fetch_constant; + // Stacked and 3D are separate TextureBindings, even for bindless for null + // descriptor handling simplicity. TextureDimension dimension; bool is_signed; }; - const TextureSRV* GetTextureSRVs(uint32_t& count_out) const { - count_out = uint32_t(texture_srvs_.size()); - return texture_srvs_.data(); + // Safe to hash and compare with memcmp for layout hashing. + const TextureBinding* GetTextureBindings(uint32_t& count_out) const { + count_out = uint32_t(texture_bindings_.size()); + return texture_bindings_.data(); } const uint32_t GetUsedTextureMask() const { return used_texture_mask_; } @@ -64,6 +68,7 @@ class D3D12Shader : public Shader { static constexpr uint32_t kMaxSamplerBindings = DxbcShaderTranslator::kMaxSamplerBindings; struct SamplerBinding { + uint32_t bindless_descriptor_index; uint32_t fetch_constant; TextureFilter mag_filter; TextureFilter min_filter; @@ -75,10 +80,29 @@ class D3D12Shader : public Shader { return sampler_bindings_.data(); } + // For owning subsystems like the pipeline state cache, accessors for unique + // identifiers (used instead of hashes to make sure collisions can't happen) + // of binding layouts used by the shader, for invalidation if a shader with an + // incompatible layout was bound. + size_t GetTextureBindingLayoutUserUID() const { + return texture_binding_layout_user_uid_; + } + void SetTextureBindingLayoutUserUID(size_t uid) { + texture_binding_layout_user_uid_ = uid; + } + size_t GetSamplerBindingLayoutUserUID() const { + return sampler_binding_layout_user_uid_; + } + void SetSamplerBindingLayoutUserUID(size_t uid) { + sampler_binding_layout_user_uid_ = uid; + } + private: - std::vector texture_srvs_; - uint32_t used_texture_mask_ = 0; + std::vector texture_bindings_; std::vector sampler_bindings_; + size_t texture_binding_layout_user_uid_ = 0; + size_t sampler_binding_layout_user_uid_ = 0; + uint32_t used_texture_mask_ = 0; std::vector forced_early_z_shader_; }; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 83f901a4e..e35b72571 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -61,19 +61,22 @@ namespace d3d12 { #include "xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.h" #include "xenia/gpu/d3d12/shaders/dxbc/tessellation_vs.h" +constexpr size_t PipelineCache::kLayoutUIDEmpty; constexpr uint32_t PipelineCache::PipelineDescription::kVersion; PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor, - RegisterFile* register_file, bool edram_rov_used, + RegisterFile* register_file, + bool bindless_resources_used, bool edram_rov_used, uint32_t resolution_scale) : command_processor_(command_processor), register_file_(register_file), + bindless_resources_used_(bindless_resources_used), edram_rov_used_(edram_rov_used), resolution_scale_(resolution_scale) { auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); shader_translator_ = std::make_unique( - provider->GetAdapterVendorID(), edram_rov_used_, + provider->GetAdapterVendorID(), bindless_resources_used_, edram_rov_used_, provider->GetGraphicsAnalysis() != nullptr); if (edram_rov_used_) { @@ -178,6 +181,13 @@ void PipelineCache::ClearCache(bool shutting_down) { COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0); // Destroy all shaders. + command_processor_->NotifyShaderBindingsLayoutUIDsInvalidated(); + if (bindless_resources_used_) { + bindless_sampler_layout_map_.clear(); + bindless_sampler_layouts_.clear(); + } + texture_binding_layout_map_.clear(); + texture_binding_layouts_.clear(); for (auto it : shader_map_) { delete it.second; } @@ -264,8 +274,8 @@ void PipelineCache::InitializeShaderStorage( auto shader_translation_thread_function = [&]() { auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); DxbcShaderTranslator translator( - provider->GetAdapterVendorID(), edram_rov_used_, - provider->GetGraphicsAnalysis() != nullptr); + provider->GetAdapterVendorID(), bindless_resources_used_, + edram_rov_used_, provider->GetGraphicsAnalysis() != nullptr); for (;;) { std::pair shader_to_translate; for (;;) { @@ -287,11 +297,11 @@ void PipelineCache::InitializeShaderStorage( translator, shader_to_translate.second, shader_to_translate.first.sq_program_cntl, shader_to_translate.first.host_vertex_shader_type)) { - std::unique_lock lock(shaders_failed_to_translate_mutex); + std::lock_guard lock(shaders_failed_to_translate_mutex); shaders_failed_to_translate.push_back(shader_to_translate.second); } { - std::unique_lock lock(shaders_translation_thread_mutex); + std::lock_guard lock(shaders_translation_thread_mutex); --shader_translation_threads_busy; } } @@ -340,7 +350,7 @@ void PipelineCache::InitializeShaderStorage( // one. size_t shader_translation_threads_needed; { - std::unique_lock lock(shaders_translation_thread_mutex); + std::lock_guard lock(shaders_translation_thread_mutex); shader_translation_threads_needed = std::min(shader_translation_threads_busy + shaders_to_translate.size() + size_t(1), @@ -353,7 +363,7 @@ void PipelineCache::InitializeShaderStorage( shader_translation_threads.back()->set_name("Shader Translation"); } { - std::unique_lock lock(shaders_translation_thread_mutex); + std::lock_guard lock(shaders_translation_thread_mutex); shaders_to_translate.emplace_back(shader_header, shader); } shaders_translation_thread_cond.notify_one(); @@ -362,7 +372,7 @@ void PipelineCache::InitializeShaderStorage( } if (!shader_translation_threads.empty()) { { - std::unique_lock lock(shaders_translation_thread_mutex); + std::lock_guard lock(shaders_translation_thread_mutex); shader_translation_threads_shutdown = true; } shaders_translation_thread_cond.notify_all(); @@ -662,7 +672,7 @@ void PipelineCache::EndSubmission() { if (shader_storage_file_flush_needed_ || pipeline_state_storage_file_flush_needed_) { { - std::unique_lock lock(storage_write_request_lock_); + std::lock_guard lock(storage_write_request_lock_); if (shader_storage_file_flush_needed_) { storage_write_flush_shaders_ = true; } @@ -955,47 +965,165 @@ bool PipelineCache::TranslateShader( return false; } - uint32_t texture_srv_count; - const DxbcShaderTranslator::TextureSRV* texture_srvs = - translator.GetTextureSRVs(texture_srv_count); + const char* host_shader_type; + if (shader->type() == ShaderType::kVertex) { + switch (shader->host_vertex_shader_type()) { + case Shader::HostVertexShaderType::kLineDomainCPIndexed: + host_shader_type = "control-point-indexed line domain"; + break; + case Shader::HostVertexShaderType::kLineDomainPatchIndexed: + host_shader_type = "patch-indexed line domain"; + break; + case Shader::HostVertexShaderType::kTriangleDomainCPIndexed: + host_shader_type = "control-point-indexed triangle domain"; + break; + case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed: + host_shader_type = "patch-indexed triangle domain"; + break; + case Shader::HostVertexShaderType::kQuadDomainCPIndexed: + host_shader_type = "control-point-indexed quad domain"; + break; + case Shader::HostVertexShaderType::kQuadDomainPatchIndexed: + host_shader_type = "patch-indexed quad domain"; + break; + default: + host_shader_type = "vertex"; + } + } else { + host_shader_type = "pixel"; + } + XELOGGPU("Generated {} shader ({}b) - hash {:016X}:\n{}\n", host_shader_type, + shader->ucode_dword_count() * 4, shader->ucode_data_hash(), + shader->ucode_disassembly().c_str()); + + // Set up texture and sampler bindings. + uint32_t texture_binding_count; + const DxbcShaderTranslator::TextureBinding* translator_texture_bindings = + translator.GetTextureBindings(texture_binding_count); uint32_t sampler_binding_count; const DxbcShaderTranslator::SamplerBinding* sampler_bindings = translator.GetSamplerBindings(sampler_binding_count); - shader->SetTexturesAndSamplers(texture_srvs, texture_srv_count, - sampler_bindings, sampler_binding_count); - - if (shader->is_valid()) { - const char* host_shader_type; - if (shader->type() == ShaderType::kVertex) { - switch (shader->host_vertex_shader_type()) { - case Shader::HostVertexShaderType::kLineDomainCPIndexed: - host_shader_type = "control-point-indexed line domain"; - break; - case Shader::HostVertexShaderType::kLineDomainPatchIndexed: - host_shader_type = "patch-indexed line domain"; - break; - case Shader::HostVertexShaderType::kTriangleDomainCPIndexed: - host_shader_type = "control-point-indexed triangle domain"; - break; - case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed: - host_shader_type = "patch-indexed triangle domain"; - break; - case Shader::HostVertexShaderType::kQuadDomainCPIndexed: - host_shader_type = "control-point-indexed quad domain"; - break; - case Shader::HostVertexShaderType::kQuadDomainPatchIndexed: - host_shader_type = "patch-indexed quad domain"; - break; - default: - host_shader_type = "vertex"; - } - } else { - host_shader_type = "pixel"; - } - XELOGGPU("Generated {} shader ({}b) - hash {:016X}:\n{}\n", - host_shader_type, shader->ucode_dword_count() * 4, - shader->ucode_data_hash(), shader->ucode_disassembly().c_str()); + shader->SetTexturesAndSamplers(translator_texture_bindings, + texture_binding_count, sampler_bindings, + sampler_binding_count); + assert_false(bindless_resources_used_ && + texture_binding_count + sampler_binding_count > + D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4); + // Get hashable texture bindings, without translator-specific info. + const D3D12Shader::TextureBinding* texture_bindings = + shader->GetTextureBindings(texture_binding_count); + size_t texture_binding_layout_bytes = + texture_binding_count * sizeof(*texture_bindings); + uint64_t texture_binding_layout_hash = 0; + if (texture_binding_count) { + texture_binding_layout_hash = + XXH64(texture_bindings, texture_binding_layout_bytes, 0); } + uint32_t bindless_sampler_count = + bindless_resources_used_ ? sampler_binding_count : 0; + uint64_t bindless_sampler_layout_hash = 0; + if (bindless_sampler_count) { + XXH64_state_t hash_state; + XXH64_reset(&hash_state, 0); + for (uint32_t i = 0; i < bindless_sampler_count; ++i) { + XXH64_update(&hash_state, &sampler_bindings[i].bindless_descriptor_index, + sizeof(sampler_bindings[i].bindless_descriptor_index)); + } + bindless_sampler_layout_hash = XXH64_digest(&hash_state); + } + // Obtain the unique IDs of binding layouts if there are any texture bindings + // or bindless samplers, for invalidation in the command processor. + size_t texture_binding_layout_uid = kLayoutUIDEmpty; + // Use sampler count for the bindful case because it's the only thing that + // must be the same for layouts to be compatible in this case + // (instruction-specified parameters are used as overrides for actual + // samplers). + static_assert( + kLayoutUIDEmpty == 0, + "Empty layout UID is assumed to be 0 because for bindful samplers, the " + "UID is their count"); + size_t sampler_binding_layout_uid = bindless_resources_used_ + ? kLayoutUIDEmpty + : size_t(sampler_binding_count); + if (texture_binding_count || bindless_sampler_count) { + std::lock_guard layouts_mutex_(layouts_mutex_); + if (texture_binding_count) { + auto found_range = + texture_binding_layout_map_.equal_range(texture_binding_layout_hash); + for (auto it = found_range.first; it != found_range.second; ++it) { + if (it->second.vector_span_length == texture_binding_count && + !std::memcmp( + texture_binding_layouts_.data() + it->second.vector_span_offset, + texture_bindings, texture_binding_layout_bytes)) { + texture_binding_layout_uid = it->second.uid; + break; + } + } + if (texture_binding_layout_uid == kLayoutUIDEmpty) { + static_assert( + kLayoutUIDEmpty == 0, + "Layout UID is size + 1 because it's assumed that 0 is the UID for " + "an empty layout"); + texture_binding_layout_uid = texture_binding_layout_map_.size() + 1; + LayoutUID new_uid; + new_uid.uid = texture_binding_layout_uid; + new_uid.vector_span_offset = texture_binding_layouts_.size(); + new_uid.vector_span_length = texture_binding_count; + texture_binding_layouts_.resize(new_uid.vector_span_offset + + texture_binding_count); + std::memcpy( + texture_binding_layouts_.data() + new_uid.vector_span_offset, + texture_bindings, texture_binding_layout_bytes); + texture_binding_layout_map_.insert( + {texture_binding_layout_hash, new_uid}); + } + } + if (bindless_sampler_count) { + auto found_range = + bindless_sampler_layout_map_.equal_range(sampler_binding_layout_uid); + for (auto it = found_range.first; it != found_range.second; ++it) { + if (it->second.vector_span_length != bindless_sampler_count) { + continue; + } + sampler_binding_layout_uid = it->second.uid; + const uint32_t* vector_bindless_sampler_layout = + bindless_sampler_layouts_.data() + it->second.vector_span_offset; + for (uint32_t i = 0; i < bindless_sampler_count; ++i) { + if (vector_bindless_sampler_layout[i] != + sampler_bindings[i].bindless_descriptor_index) { + sampler_binding_layout_uid = kLayoutUIDEmpty; + break; + } + } + if (sampler_binding_layout_uid != kLayoutUIDEmpty) { + break; + } + } + if (sampler_binding_layout_uid == kLayoutUIDEmpty) { + sampler_binding_layout_uid = bindless_sampler_layout_map_.size(); + LayoutUID new_uid; + static_assert( + kLayoutUIDEmpty == 0, + "Layout UID is size + 1 because it's assumed that 0 is the UID for " + "an empty layout"); + new_uid.uid = sampler_binding_layout_uid + 1; + new_uid.vector_span_offset = bindless_sampler_layouts_.size(); + new_uid.vector_span_length = sampler_binding_count; + bindless_sampler_layouts_.resize(new_uid.vector_span_offset + + sampler_binding_count); + uint32_t* vector_bindless_sampler_layout = + bindless_sampler_layouts_.data() + new_uid.vector_span_offset; + for (uint32_t i = 0; i < bindless_sampler_count; ++i) { + vector_bindless_sampler_layout[i] = + sampler_bindings[i].bindless_descriptor_index; + } + bindless_sampler_layout_map_.insert( + {bindless_sampler_layout_hash, new_uid}); + } + } + } + shader->SetTextureBindingLayoutUserUID(texture_binding_layout_uid); + shader->SetSamplerBindingLayoutUserUID(sampler_binding_layout_uid); // Create a version of the shader with early depth/stencil forced by Xenia // itself when it's safe to do so or when EARLY_Z_ENABLE is set in @@ -1856,7 +1984,7 @@ void PipelineCache::CreationThread(size_t thread_index) { // set the completion event if needed (at the next iteration, or in some // other thread). { - std::unique_lock lock(creation_request_lock_); + std::lock_guard lock(creation_request_lock_); --creation_threads_busy_; } } @@ -1867,7 +1995,7 @@ void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() { while (true) { PipelineState* pipeline_state_to_create; { - std::unique_lock lock(creation_request_lock_); + std::lock_guard lock(creation_request_lock_); if (creation_queue_.empty()) { break; } diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index 25fda0524..b73846d70 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -21,6 +21,7 @@ #include #include +#include "xenia/base/hash.h" #include "xenia/base/platform.h" #include "xenia/base/threading.h" #include "xenia/gpu/d3d12/d3d12_shader.h" @@ -37,9 +38,11 @@ class D3D12CommandProcessor; class PipelineCache { public: + static constexpr size_t kLayoutUIDEmpty = 0; + PipelineCache(D3D12CommandProcessor* command_processor, - RegisterFile* register_file, bool edram_rov_used, - uint32_t resolution_scale); + RegisterFile* register_file, bool bindless_resources_used, + bool edram_rov_used, uint32_t resolution_scale); ~PipelineCache(); bool Initialize(); @@ -217,6 +220,7 @@ class PipelineCache { PipelineDescription description; }; + // Can be called from multiple threads. bool TranslateShader(DxbcShaderTranslator& translator, D3D12Shader* shader, reg::SQ_PROGRAM_CNTL cntl, Shader::HostVertexShaderType host_vertex_shader_type = @@ -233,13 +237,37 @@ class PipelineCache { D3D12CommandProcessor* command_processor_; RegisterFile* register_file_; + bool bindless_resources_used_; bool edram_rov_used_; uint32_t resolution_scale_; // Reusable shader translator. std::unique_ptr shader_translator_ = nullptr; // All loaded shaders mapped by their guest hash key. - std::unordered_map shader_map_; + std::unordered_map> + shader_map_; + + struct LayoutUID { + size_t uid; + size_t vector_span_offset; + size_t vector_span_length; + }; + std::mutex layouts_mutex_; + // Texture binding layouts of different shaders, for obtaining layout UIDs. + std::vector texture_binding_layouts_; + // Map of texture binding layouts used by shaders, for obtaining UIDs. Keys + // are XXH64 hashes of layouts, values need manual collision resolution using + // layout_vector_offset:layout_length of texture_binding_layouts_. + std::unordered_multimap> + texture_binding_layout_map_; + // Bindless sampler indices of different shaders, for obtaining layout UIDs. + // For bindful, sampler count is used as the UID instead. + std::vector bindless_sampler_layouts_; + // Keys are XXH64 hashes of used bindless sampler indices. + std::unordered_multimap> + bindless_sampler_layout_map_; // Empty depth-only pixel shader for writing to depth buffer via ROV when no // Xenos pixel shader provided. @@ -252,7 +280,9 @@ class PipelineCache { }; // All previously generated pipeline state objects identified by hash and the // description. - std::unordered_multimap pipeline_states_; + std::unordered_multimap> + pipeline_states_; // Previously used pipeline state object. This matches our current state // settings and allows us to quickly(ish) reuse the pipeline state if no diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index b6d172c90..155faab69 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -102,10 +102,12 @@ const RenderTargetCache::EDRAMLoadStoreModeInfo RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor, RegisterFile* register_file, TraceWriter* trace_writer, + bool bindless_resources_used, bool edram_rov_used) : command_processor_(command_processor), register_file_(register_file), trace_writer_(trace_writer), + bindless_resources_used_(bindless_resources_used), edram_rov_used_(edram_rov_used) {} RenderTargetCache::~RenderTargetCache() { Shutdown(); } @@ -181,10 +183,10 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) { edram_buffer_, nullptr, &edram_buffer_uint32_uav_desc, provider->OffsetViewDescriptor( edram_buffer_descriptor_heap_start_, - uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV))); + uint32_t(EDRAMBufferDescriptorIndex::kR32UintUAV))); // Create the root signature for EDRAM buffer load/store. - D3D12_ROOT_PARAMETER load_store_root_parameters[2]; + D3D12_ROOT_PARAMETER load_store_root_parameters[3]; // Parameter 0 is constants (changed for each render target binding). load_store_root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; @@ -193,24 +195,32 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) { load_store_root_parameters[0].Constants.Num32BitValues = sizeof(EDRAMLoadStoreRootConstants) / sizeof(uint32_t); load_store_root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - // Parameter 1 is source and target. - D3D12_DESCRIPTOR_RANGE load_store_root_ranges[2]; - load_store_root_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - load_store_root_ranges[0].NumDescriptors = 1; - load_store_root_ranges[0].BaseShaderRegister = 0; - load_store_root_ranges[0].RegisterSpace = 0; - load_store_root_ranges[0].OffsetInDescriptorsFromTableStart = 0; - load_store_root_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - load_store_root_ranges[1].NumDescriptors = 1; - load_store_root_ranges[1].BaseShaderRegister = 0; - load_store_root_ranges[1].RegisterSpace = 0; - load_store_root_ranges[1].OffsetInDescriptorsFromTableStart = 1; + // Parameter 1 is the destination. + D3D12_DESCRIPTOR_RANGE load_store_root_dest_range; + load_store_root_dest_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + load_store_root_dest_range.NumDescriptors = 1; + load_store_root_dest_range.BaseShaderRegister = 0; + load_store_root_dest_range.RegisterSpace = 0; + load_store_root_dest_range.OffsetInDescriptorsFromTableStart = 0; load_store_root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 2; + load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1; load_store_root_parameters[1].DescriptorTable.pDescriptorRanges = - load_store_root_ranges; + &load_store_root_dest_range; load_store_root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + // Parameter 2 is the source. + D3D12_DESCRIPTOR_RANGE load_store_root_source_range; + load_store_root_source_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + load_store_root_source_range.NumDescriptors = 1; + load_store_root_source_range.BaseShaderRegister = 0; + load_store_root_source_range.RegisterSpace = 0; + load_store_root_source_range.OffsetInDescriptorsFromTableStart = 0; + load_store_root_parameters[2].ParameterType = + D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + load_store_root_parameters[2].DescriptorTable.NumDescriptorRanges = 1; + load_store_root_parameters[2].DescriptorTable.pDescriptorRanges = + &load_store_root_source_range; + load_store_root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; D3D12_ROOT_SIGNATURE_DESC load_store_root_desc; load_store_root_desc.NumParameters = UINT(xe::countof(load_store_root_parameters)); @@ -226,10 +236,8 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) { Shutdown(); return false; } - // Create the clear root signature (the same, but with the UAV only). - load_store_root_ranges[1].OffsetInDescriptorsFromTableStart = 0; - load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1; - ++load_store_root_parameters[1].DescriptorTable.pDescriptorRanges; + // Create the clear root signature (the same, but with the destination only). + load_store_root_desc.NumParameters = 2; edram_clear_root_signature_ = ui::d3d12::util::CreateRootSignature(provider, load_store_root_desc); if (edram_clear_root_signature_ == nullptr) { @@ -1359,8 +1367,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, // bilinear filtering), applying exponent bias and swapping red and blue in // a format-agnostic way, then the resulting color is written to a temporary // RTV of the destination format. - auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); - auto device = provider->GetDevice(); + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); uint32_t resolution_scale_log2 = resolution_scale_2x_ ? 1 : 0; // Check if we need to apply the hack to remove the gap on the left and top // sides of the screen caused by half-pixel offset becoming whole pixel offset @@ -1423,33 +1431,50 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, } // Write the source and destination descriptors. - D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; - D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; - if (command_processor_->RequestViewDescriptors( - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2, - descriptor_cpu_start, descriptor_gpu_start) == - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { - return false; + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_dest; + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_source; + if (bindless_resources_used_) { + if (resolution_scale_2x_) { + if (!command_processor_->RequestOneUseSingleViewDescriptors( + 1, &descriptor_dest)) { + return false; + } + } else { + descriptor_dest = command_processor_->GetSystemBindlessViewHandlePair( + D3D12CommandProcessor::SystemBindlessView::kSharedMemoryRawUAV); + } + descriptor_source = command_processor_->GetSystemBindlessViewHandlePair( + D3D12CommandProcessor::SystemBindlessView::kEDRAMRawSRV); + } else { + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2]; + if (!command_processor_->RequestOneUseSingleViewDescriptors( + 2, descriptors)) { + return false; + } + descriptor_dest = descriptors[0]; + if (!resolution_scale_2x_) { + shared_memory->WriteRawUAVDescriptor(descriptor_dest.first); + } + descriptor_source = descriptors[1]; + WriteEDRAMRawSRVDescriptor(descriptor_source.first); } TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - WriteEDRAMRawSRVDescriptor(descriptor_cpu_start); if (resolution_scale_2x_) { texture_cache->UseScaledResolveBufferForWriting(); // Can't address more than 512 MB directly on Nvidia - binding only a part // of the buffer. texture_cache->CreateScaledResolveBufferRawUAV( - provider->OffsetViewDescriptor(descriptor_cpu_start, 1), - dest_address >> 12, + descriptor_dest.first, dest_address >> 12, ((dest_address + dest_size - 1) >> 12) - (dest_address >> 12) + 1); } else { shared_memory->UseForWriting(); - shared_memory->WriteRawUAVDescriptor( - provider->OffsetViewDescriptor(descriptor_cpu_start, 1)); + // Descriptor already written. } - command_processor_->SubmitBarriers(); // Dispatch the computation. command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_); + command_list->D3DSetComputeRootDescriptorTable(2, descriptor_source.second); + command_list->D3DSetComputeRootDescriptorTable(1, descriptor_dest.second); EDRAMLoadStoreRootConstants root_constants; // Address is adjusted to the first modified tile, so using & 31 as the // destination offset. @@ -1488,10 +1513,11 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, } command_list->D3DSetComputeRoot32BitConstants( 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); - command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start); + command_processor_->SetComputePipeline( src_64bpp ? edram_tile_sample_64bpp_pipeline_ : edram_tile_sample_32bpp_pipeline_); + command_processor_->SubmitBarriers(); // 1 group per destination 80x16 region. uint32_t group_count_x = row_width_ss_div_80, group_count_y = rows; if (msaa_samples >= MsaaSamples::k2X) { @@ -1572,14 +1598,30 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, if (resolve_target == nullptr) { return false; } - // Descriptors. 2 for EDRAM load, 1 for conversion. - D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; - D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; - if (command_processor_->RequestViewDescriptors( - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 3, 3, - descriptor_cpu_start, descriptor_gpu_start) == - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { - return false; + // Descriptors. + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_copy_buffer; + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_rt; + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram; + if (bindless_resources_used_) { + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2]; + if (!command_processor_->RequestOneUseSingleViewDescriptors( + 2, descriptors)) { + return false; + } + descriptor_copy_buffer = descriptors[0]; + descriptor_rt = descriptors[1]; + descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair( + D3D12CommandProcessor::SystemBindlessView::kEDRAMRawSRV); + } else { + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[3]; + if (!command_processor_->RequestOneUseSingleViewDescriptors( + 3, descriptors)) { + return false; + } + descriptor_copy_buffer = descriptors[0]; + descriptor_rt = descriptors[1]; + descriptor_edram = descriptors[2]; + WriteEDRAMRawSRVDescriptor(descriptor_edram.first); } // Buffer for copying. D3D12_RESOURCE_STATES copy_buffer_state = @@ -1616,11 +1658,12 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, 0, sizeof(load_root_constants) / sizeof(uint32_t), &load_root_constants, 0); - WriteEDRAMRawSRVDescriptor(descriptor_cpu_start); - ui::d3d12::util::CreateRawBufferUAV( - device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1), - copy_buffer, render_target->copy_buffer_size); - command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start); + command_list->D3DSetComputeRootDescriptorTable(2, descriptor_edram.second); + ui::d3d12::util::CreateRawBufferUAV(device, descriptor_copy_buffer.first, + copy_buffer, + render_target->copy_buffer_size); + command_list->D3DSetComputeRootDescriptorTable( + 1, descriptor_copy_buffer.second); EDRAMLoadStoreMode mode = GetLoadStoreMode(false, src_format); command_processor_->SetComputePipeline( @@ -1630,13 +1673,6 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, command_list->D3DDispatch(row_width_ss_div_80, rows, 1); command_processor_->PushUAVBarrier(copy_buffer); - // Go to the next descriptor set. - - descriptor_cpu_start = - provider->OffsetViewDescriptor(descriptor_cpu_start, 2); - descriptor_gpu_start = - provider->OffsetViewDescriptor(descriptor_gpu_start, 2); - // Copy the EDRAM buffer contents to the source texture. #if 0 @@ -1770,8 +1806,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, rt_srv_desc.Texture2D.PlaneSlice = 0; rt_srv_desc.Texture2D.ResourceMinLODClamp = 0.0f; device->CreateShaderResourceView(render_target->resource, &rt_srv_desc, - descriptor_cpu_start); - command_list->D3DSetGraphicsRootDescriptorTable(1, descriptor_gpu_start); + descriptor_rt.first); + command_list->D3DSetGraphicsRootDescriptorTable(1, descriptor_rt.second); command_processor_->SubmitBarriers(); command_processor_->SetSamplePositions(MsaaSamples::k1X); @@ -1878,17 +1914,17 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base, uint32_t samples_x_log2 = msaa_samples >= MsaaSamples::k4X ? 1 : 0; uint32_t samples_y_log2 = msaa_samples >= MsaaSamples::k2X ? 1 : 0; - // Get everything needed for clearing. - auto command_list = command_processor_->GetDeferredCommandList(); - auto device = - command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); - D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; - D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; - if (command_processor_->RequestViewDescriptors( - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 1, 1, - descriptor_cpu_start, descriptor_gpu_start) == - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { - return false; + // Get transient data needed for clearing. + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram; + if (bindless_resources_used_) { + descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair( + D3D12CommandProcessor::SystemBindlessView::kEDRAMRawUAV); + } else { + if (!command_processor_->RequestOneUseSingleViewDescriptors( + 1, &descriptor_edram)) { + return false; + } + WriteEDRAMRawUAVDescriptor(descriptor_edram.first); } // Submit the clear. @@ -1935,11 +1971,11 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base, root_constants.clear_color_high = regs[reg].u32; command_processor_->SetComputePipeline(edram_clear_32bpp_pipeline_); } + auto command_list = command_processor_->GetDeferredCommandList(); command_list->D3DSetComputeRootSignature(edram_clear_root_signature_); command_list->D3DSetComputeRoot32BitConstants( 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); - WriteEDRAMRawUAVDescriptor(descriptor_cpu_start); - command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start); + command_list->D3DSetComputeRootDescriptorTable(1, descriptor_edram.second); // 1 group per 80x16 samples. Resolution scale handled in the shader itself. command_list->D3DDispatch(row_width_ss_div_80, rows, 1); CommitEDRAMBufferUAVWrites(true); @@ -2150,7 +2186,7 @@ void RenderTargetCache::FlushAndUnbindRenderTargets() { ClearBindings(); } -void RenderTargetCache::WriteEDRAMUint32UAVDescriptor( +void RenderTargetCache::WriteEDRAMR32UintUAVDescriptor( D3D12_CPU_DESCRIPTOR_HANDLE handle) { auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto device = provider->GetDevice(); @@ -2158,7 +2194,31 @@ void RenderTargetCache::WriteEDRAMUint32UAVDescriptor( 1, handle, provider->OffsetViewDescriptor( edram_buffer_descriptor_heap_start_, - uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)), + uint32_t(EDRAMBufferDescriptorIndex::kR32UintUAV)), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); +} + +void RenderTargetCache::WriteEDRAMRawSRVDescriptor( + D3D12_CPU_DESCRIPTOR_HANDLE handle) { + auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); + auto device = provider->GetDevice(); + device->CopyDescriptorsSimple( + 1, handle, + provider->OffsetViewDescriptor( + edram_buffer_descriptor_heap_start_, + uint32_t(EDRAMBufferDescriptorIndex::kRawSRV)), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); +} + +void RenderTargetCache::WriteEDRAMRawUAVDescriptor( + D3D12_CPU_DESCRIPTOR_HANDLE handle) { + auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); + auto device = provider->GetDevice(); + device->CopyDescriptorsSimple( + 1, handle, + provider->OffsetViewDescriptor( + edram_buffer_descriptor_heap_start_, + uint32_t(EDRAMBufferDescriptorIndex::kRawUAV)), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } @@ -2283,13 +2343,22 @@ void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) { // Clear and ignore the old 32-bit float depth - the non-ROV path is // inaccurate anyway, and this is backend-specific, not a part of a guest // trace. - D3D12_CPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_cpu; - D3D12_GPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_gpu; - if (command_processor_->RequestViewDescriptors( - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 1, 1, - shader_visbile_descriptor_cpu, shader_visbile_descriptor_gpu) != - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { - WriteEDRAMUint32UAVDescriptor(shader_visbile_descriptor_cpu); + bool edram_shader_visible_r32_uav_obtained; + ui::d3d12::util::DescriptorCPUGPUHandlePair edram_shader_visible_r32_uav; + if (bindless_resources_used_) { + edram_shader_visible_r32_uav_obtained = true; + edram_shader_visible_r32_uav = + command_processor_->GetSystemBindlessViewHandlePair( + D3D12CommandProcessor::SystemBindlessView::kEDRAMR32UintUAV); + } else { + edram_shader_visible_r32_uav_obtained = + command_processor_->RequestOneUseSingleViewDescriptors( + 1, &edram_shader_visible_r32_uav); + if (edram_shader_visible_r32_uav_obtained) { + WriteEDRAMR32UintUAVDescriptor(edram_shader_visible_r32_uav.first); + } + } + if (edram_shader_visible_r32_uav_obtained) { UINT clear_value[4] = {0, 0, 0, 0}; D3D12_RECT clear_rect; clear_rect.left = kEDRAMSize >> 2; @@ -2301,13 +2370,11 @@ void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) { // ClearUnorderedAccessView takes a shader-visible GPU descriptor and a // non-shader-visible CPU descriptor. command_list->D3DClearUnorderedAccessViewUint( - shader_visbile_descriptor_gpu, + edram_shader_visible_r32_uav.second, provider->OffsetViewDescriptor( edram_buffer_descriptor_heap_start_, - uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)), + uint32_t(EDRAMBufferDescriptorIndex::kR32UintUAV)), edram_buffer_, clear_value, 1, &clear_rect); - } else { - XELOGE("Failed to get a UAV descriptor for invalidating 32-bit depth"); } } } @@ -2343,30 +2410,6 @@ void RenderTargetCache::CommitEDRAMBufferUAVWrites(bool force) { edram_buffer_modified_ = false; } -void RenderTargetCache::WriteEDRAMRawSRVDescriptor( - D3D12_CPU_DESCRIPTOR_HANDLE handle) { - auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); - auto device = provider->GetDevice(); - device->CopyDescriptorsSimple( - 1, handle, - provider->OffsetViewDescriptor( - edram_buffer_descriptor_heap_start_, - uint32_t(EDRAMBufferDescriptorIndex::kRawSRV)), - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); -} - -void RenderTargetCache::WriteEDRAMRawUAVDescriptor( - D3D12_CPU_DESCRIPTOR_HANDLE handle) { - auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); - auto device = provider->GetDevice(); - device->CopyDescriptorsSimple( - 1, handle, - provider->OffsetViewDescriptor( - edram_buffer_descriptor_heap_start_, - uint32_t(EDRAMBufferDescriptorIndex::kRawUAV)), - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); -} - void RenderTargetCache::ClearBindings() { current_surface_pitch_ = 0; current_msaa_samples_ = MsaaSamples::k1X; @@ -2710,13 +2753,24 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() { } // Allocate descriptors for the buffers. - D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; - D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; - if (command_processor_->RequestViewDescriptors( - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2, - descriptor_cpu_start, descriptor_gpu_start) == - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { - return; + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram; + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_source; + if (bindless_resources_used_) { + if (!command_processor_->RequestOneUseSingleViewDescriptors( + 1, &descriptor_source)) { + return; + } + descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair( + D3D12CommandProcessor::SystemBindlessView::kEDRAMRawUAV); + } else { + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2]; + if (!command_processor_->RequestOneUseSingleViewDescriptors(2, + descriptors)) { + return; + } + descriptor_edram = descriptors[0]; + WriteEDRAMRawUAVDescriptor(descriptor_edram.first); + descriptor_source = descriptors[1]; } // Get the buffer for copying. @@ -2740,14 +2794,13 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() { TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); // Set up the bindings. - auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); - auto device = provider->GetDevice(); + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_); - ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start, copy_buffer, - copy_buffer_size); - WriteEDRAMRawUAVDescriptor( - provider->OffsetViewDescriptor(descriptor_cpu_start, 1)); - command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start); + ui::d3d12::util::CreateRawBufferSRV(device, descriptor_source.first, + copy_buffer, copy_buffer_size); + command_list->D3DSetComputeRootDescriptorTable(2, descriptor_source.second); + command_list->D3DSetComputeRootDescriptorTable(1, descriptor_edram.second); // Sort the bindings in ascending order of EDRAM base so data in the render // targets placed farther in EDRAM isn't lost in case of overlap. @@ -2857,13 +2910,23 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM( auto command_list = command_processor_->GetDeferredCommandList(); // Allocate descriptors for the buffers. - D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; - D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; - if (command_processor_->RequestViewDescriptors( - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2, - descriptor_cpu_start, descriptor_gpu_start) == - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { - return; + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_dest, descriptor_edram; + if (bindless_resources_used_) { + if (!command_processor_->RequestOneUseSingleViewDescriptors( + 1, &descriptor_dest)) { + return; + } + descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair( + D3D12CommandProcessor::SystemBindlessView::kEDRAMRawSRV); + } else { + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2]; + if (!command_processor_->RequestOneUseSingleViewDescriptors(2, + descriptors)) { + return; + } + descriptor_dest = descriptors[0]; + descriptor_edram = descriptors[1]; + WriteEDRAMRawSRVDescriptor(descriptor_edram.first); } // Get the buffer for copying. @@ -2892,14 +2955,13 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM( TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); // Set up the bindings. - auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); - auto device = provider->GetDevice(); + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_); - WriteEDRAMRawSRVDescriptor(descriptor_cpu_start); - ui::d3d12::util::CreateRawBufferUAV( - device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1), - copy_buffer, copy_buffer_size); - command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start); + command_list->D3DSetComputeRootDescriptorTable(2, descriptor_edram.second); + ui::d3d12::util::CreateRawBufferUAV(device, descriptor_dest.first, + copy_buffer, copy_buffer_size); + command_list->D3DSetComputeRootDescriptorTable(1, descriptor_dest.second); // Load each render target. for (uint32_t i = 0; i < render_target_count; ++i) { diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index 888938eb7..0c7b54bf8 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -251,7 +251,7 @@ class RenderTargetCache { RenderTargetCache(D3D12CommandProcessor* command_processor, RegisterFile* register_file, TraceWriter* trace_writer, - bool edram_rov_used); + bool bindless_resources_used, bool edram_rov_used); ~RenderTargetCache(); bool Initialize(const TextureCache* texture_cache); @@ -284,7 +284,9 @@ class RenderTargetCache { // the command processor takes over framebuffer bindings to draw something // special. May change the CBV/SRV/UAV descriptor heap. void FlushAndUnbindRenderTargets(); - void WriteEDRAMUint32UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); + void WriteEDRAMR32UintUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); + void WriteEDRAMRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); + void WriteEDRAMRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); // Totally necessary to rely on the base format - Too Human switches between // 2_10_10_10_FLOAT and 2_10_10_10_FLOAT_AS_16_16_16_16 every draw. @@ -436,9 +438,6 @@ class RenderTargetCache { void TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state); void CommitEDRAMBufferUAVWrites(bool force); - void WriteEDRAMRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); - void WriteEDRAMRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); - void ClearBindings(); #if 0 @@ -518,6 +517,7 @@ class RenderTargetCache { D3D12CommandProcessor* command_processor_; RegisterFile* register_file_; TraceWriter* trace_writer_; + bool bindless_resources_used_; bool edram_rov_used_; // Whether 1 guest pixel is rendered as 2x2 host pixels (currently only @@ -538,7 +538,7 @@ class RenderTargetCache { kRawSRV, kRawUAV, // For ROV access primarily. - kUint32UAV, + kR32UintUAV, kCount, }; diff --git a/src/xenia/gpu/d3d12/shaders/edram_load_store.hlsli b/src/xenia/gpu/d3d12/shaders/edram_load_store.hlsli index 8c9cfef1f..cef1d9315 100644 --- a/src/xenia/gpu/d3d12/shaders/edram_load_store.hlsli +++ b/src/xenia/gpu/d3d12/shaders/edram_load_store.hlsli @@ -47,10 +47,10 @@ cbuffer XeEDRAMLoadStoreConstants : register(b0) { #define xe_edram_clear_depth24 (xe_edram_load_store_constants.z) #define xe_edram_clear_depth32 (xe_edram_load_store_constants.w) +RWByteAddressBuffer xe_edram_load_store_dest : register(u0); #ifndef XE_EDRAM_WRITE_ONLY ByteAddressBuffer xe_edram_load_store_source : register(t0); #endif -RWByteAddressBuffer xe_edram_load_store_dest : register(u0); uint2 XeEDRAMSampleCountLog2() { return (xe_edram_base_samples_2x_depth_pitch >> uint2(12u, 11u)) & 1u; diff --git a/src/xenia/gpu/d3d12/shaders/texture_load.hlsli b/src/xenia/gpu/d3d12/shaders/texture_load.hlsli index 6abf95f61..f15a810ff 100644 --- a/src/xenia/gpu/d3d12/shaders/texture_load.hlsli +++ b/src/xenia/gpu/d3d12/shaders/texture_load.hlsli @@ -27,8 +27,8 @@ cbuffer XeTextureLoadConstants : register(b0) { #define XeTextureLoadGuestPitchTiled 0xFFFFFFFFu -ByteAddressBuffer xe_texture_load_source : register(t0); RWByteAddressBuffer xe_texture_load_dest : register(u0); +ByteAddressBuffer xe_texture_load_source : register(t0); // bpb and bpb_log2 are separate because bpb may be not a power of 2 (like 96). uint4 XeTextureLoadGuestBlockOffsets(uint3 block_index, uint bpb, diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index cb6c04bad..a5d016d28 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -494,7 +494,8 @@ bool SharedMemory::AreTiledResourcesUsed() const { auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); // As of October 8th, 2018, PIX doesn't support tiled buffers. // FIXME(Triang3l): Re-enable tiled resources with PIX once fixed. - return provider->GetTiledResourcesTier() >= 1 && + return provider->GetTiledResourcesTier() != + D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED && provider->GetGraphicsAnalysis() == nullptr; } diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 2c09f0b56..13d7ed6b1 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -12,6 +12,7 @@ #include "third_party/xxhash/xxhash.h" #include +#include #include #include "xenia/base/assert.h" @@ -92,7 +93,6 @@ namespace d3d12 { #include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_r10g11b11_rgba16_cs.h" #include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_r11g11b10_rgba16_cs.h" -constexpr uint32_t TextureCache::Texture::kCachedSRVDescriptorSwizzleMissing; constexpr uint32_t TextureCache::SRVDescriptorCachePage::kHeapSize; constexpr uint32_t TextureCache::LoadConstants::kGuestPitchTiled; constexpr uint32_t TextureCache::kScaledResolveBufferSizeLog2; @@ -905,9 +905,11 @@ const TextureCache::ResolveTileModeInfo TextureCache::TextureCache(D3D12CommandProcessor* command_processor, RegisterFile* register_file, + bool bindless_resources_used, SharedMemory* shared_memory) : command_processor_(command_processor), register_file_(register_file), + bindless_resources_used_(bindless_resources_used), shared_memory_(shared_memory) {} TextureCache::~TextureCache() { Shutdown(); } @@ -920,7 +922,8 @@ bool TextureCache::Initialize(bool edram_rov_used) { // Not currently supported with the RTV/DSV output path for various reasons. // As of November 27th, 2018, PIX doesn't support tiled buffers. if (cvars::d3d12_resolution_scale >= 2 && edram_rov_used && - provider->GetTiledResourcesTier() >= 1 && + provider->GetTiledResourcesTier() != + D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED && provider->GetGraphicsAnalysis() == nullptr && provider->GetVirtualAddressBitsPerResource() >= kScaledResolveBufferSizeLog2) { @@ -947,28 +950,34 @@ bool TextureCache::Initialize(bool edram_rov_used) { scaled_resolve_heap_count_ = 0; // Create the loading root signature. - D3D12_ROOT_PARAMETER root_parameters[2]; - // Parameter 0 is constants (changed very often when untiling). + D3D12_ROOT_PARAMETER root_parameters[3]; + // Parameter 0 is constants (changed multiple times when untiling). root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; root_parameters[0].Descriptor.ShaderRegister = 0; root_parameters[0].Descriptor.RegisterSpace = 0; root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - // Parameter 1 is source and target. - D3D12_DESCRIPTOR_RANGE root_copy_ranges[2]; - root_copy_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - root_copy_ranges[0].NumDescriptors = 1; - root_copy_ranges[0].BaseShaderRegister = 0; - root_copy_ranges[0].RegisterSpace = 0; - root_copy_ranges[0].OffsetInDescriptorsFromTableStart = 0; - root_copy_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - root_copy_ranges[1].NumDescriptors = 1; - root_copy_ranges[1].BaseShaderRegister = 0; - root_copy_ranges[1].RegisterSpace = 0; - root_copy_ranges[1].OffsetInDescriptorsFromTableStart = 1; + // Parameter 1 is the destination. + D3D12_DESCRIPTOR_RANGE root_dest_range; + root_dest_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + root_dest_range.NumDescriptors = 1; + root_dest_range.BaseShaderRegister = 0; + root_dest_range.RegisterSpace = 0; + root_dest_range.OffsetInDescriptorsFromTableStart = 0; root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - root_parameters[1].DescriptorTable.NumDescriptorRanges = 2; - root_parameters[1].DescriptorTable.pDescriptorRanges = root_copy_ranges; + root_parameters[1].DescriptorTable.NumDescriptorRanges = 1; + root_parameters[1].DescriptorTable.pDescriptorRanges = &root_dest_range; root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + // Parameter 2 is the source. + D3D12_DESCRIPTOR_RANGE root_source_range; + root_source_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + root_source_range.NumDescriptors = 1; + root_source_range.BaseShaderRegister = 0; + root_source_range.RegisterSpace = 0; + root_source_range.OffsetInDescriptorsFromTableStart = 0; + root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_parameters[2].DescriptorTable.NumDescriptorRanges = 1; + root_parameters[2].DescriptorTable.pDescriptorRanges = &root_source_range; + root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; D3D12_ROOT_SIGNATURE_DESC root_signature_desc; root_signature_desc.NumParameters = UINT(xe::countof(root_parameters)); root_signature_desc.pParameters = root_parameters; @@ -1033,6 +1042,8 @@ bool TextureCache::Initialize(bool edram_rov_used) { } } + srv_descriptor_cache_allocated_ = 0; + // Create a heap with null SRV descriptors, since it's faster to copy a // descriptor than to create an SRV, and null descriptors are used a lot (for // the signed version when only unsigned is used, for instance). @@ -1137,6 +1148,14 @@ void TextureCache::ClearCache() { Texture* texture = texture_pair.second; shared_memory_->UnwatchMemoryRange(texture->base_watch_handle); shared_memory_->UnwatchMemoryRange(texture->mip_watch_handle); + // Bindful descriptor cache will be cleared entirely now, so only release + // bindless descriptors. + if (bindless_resources_used_) { + for (auto descriptor_pair : texture->srv_descriptors) { + command_processor_->ReleaseViewBindlessDescriptorImmediately( + descriptor_pair.second); + } + } texture->resource->Release(); delete texture; } @@ -1148,6 +1167,7 @@ void TextureCache::ClearCache() { // Clear texture descriptor cache. srv_descriptor_cache_free_.clear(); + srv_descriptor_cache_allocated_ = 0; for (auto& page : srv_descriptor_cache_) { page.heap->Release(); } @@ -1155,7 +1175,7 @@ void TextureCache::ClearCache() { } void TextureCache::TextureFetchConstantWritten(uint32_t index) { - texture_keys_in_sync_ &= ~(1u << index); + texture_bindings_in_sync_ &= ~(1u << index); } void TextureCache::BeginFrame() { @@ -1214,12 +1234,18 @@ void TextureCache::BeginFrame() { // Exclude the texture from the memory usage counter. textures_total_size_ -= texture->resource_size; // Destroy the texture. - if (texture->cached_srv_descriptor_swizzle != - Texture::kCachedSRVDescriptorSwizzleMissing) { - srv_descriptor_cache_free_.push_back(texture->cached_srv_descriptor); - } shared_memory_->UnwatchMemoryRange(texture->base_watch_handle); shared_memory_->UnwatchMemoryRange(texture->mip_watch_handle); + if (bindless_resources_used_) { + for (auto descriptor_pair : texture->srv_descriptors) { + command_processor_->ReleaseViewBindlessDescriptorImmediately( + descriptor_pair.second); + } + } else { + for (auto descriptor_pair : texture->srv_descriptors) { + srv_descriptor_cache_free_.push_back(descriptor_pair.second); + } + } texture->resource->Release(); delete texture; } @@ -1262,8 +1288,10 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) { // loading may be needed in some draw call later, which may have the same // key for some binding as before the invalidation, but texture_invalidated_ // being false (menu background in Halo 3). - std::memset(texture_bindings_, 0, sizeof(texture_bindings_)); - texture_keys_in_sync_ = 0; + for (size_t i = 0; i < xe::countof(texture_bindings_); ++i) { + texture_bindings_[i].Clear(); + } + texture_bindings_in_sync_ = 0; } // Update the texture keys and the textures. @@ -1272,7 +1300,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) { while (xe::bit_scan_forward(textures_remaining, &index)) { uint32_t index_bit = uint32_t(1) << index; textures_remaining &= ~index_bit; - if (texture_keys_in_sync_ & index_bit) { + if (texture_bindings_in_sync_ & index_bit) { continue; } TextureBinding& binding = texture_bindings_[index]; @@ -1282,10 +1310,12 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) { uint8_t old_swizzled_signs = binding.swizzled_signs; BindingInfoFromFetchConstant(fetch, binding.key, &binding.host_swizzle, &binding.swizzled_signs); - texture_keys_in_sync_ |= index_bit; + texture_bindings_in_sync_ |= index_bit; if (binding.key.IsInvalid()) { binding.texture = nullptr; binding.texture_signed = nullptr; + binding.descriptor_index = UINT32_MAX; + binding.descriptor_index_signed = UINT32_MAX; continue; } @@ -1305,27 +1335,64 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) { if (key_changed || !texture_util::IsAnySignNotSigned(old_swizzled_signs)) { binding.texture = FindOrCreateTexture(binding.key); + binding.descriptor_index = + binding.texture + ? FindOrCreateTextureDescriptor(*binding.texture, false, + binding.host_swizzle) + : UINT32_MAX; load_unsigned_data = true; } } else { binding.texture = nullptr; + binding.descriptor_index = UINT32_MAX; } if (texture_util::IsAnySignSigned(binding.swizzled_signs)) { if (key_changed || !texture_util::IsAnySignSigned(old_swizzled_signs)) { TextureKey signed_key = binding.key; signed_key.signed_separate = 1; binding.texture_signed = FindOrCreateTexture(signed_key); + binding.descriptor_index_signed = + binding.texture + ? FindOrCreateTextureDescriptor(*binding.texture_signed, true, + binding.host_swizzle) + : UINT32_MAX; load_signed_data = true; } } else { binding.texture_signed = nullptr; + binding.descriptor_index_signed = UINT32_MAX; } } else { + // Same resource for both unsigned and signed, but descriptor formats may + // be different. if (key_changed) { binding.texture = FindOrCreateTexture(binding.key); load_unsigned_data = true; } binding.texture_signed = nullptr; + if (texture_util::IsAnySignNotSigned(binding.swizzled_signs)) { + if (key_changed || + !texture_util::IsAnySignNotSigned(old_swizzled_signs)) { + binding.descriptor_index = + binding.texture + ? FindOrCreateTextureDescriptor(*binding.texture, false, + binding.host_swizzle) + : UINT32_MAX; + } + } else { + binding.descriptor_index = UINT32_MAX; + } + if (texture_util::IsAnySignSigned(binding.swizzled_signs)) { + if (key_changed || !texture_util::IsAnySignSigned(old_swizzled_signs)) { + binding.descriptor_index_signed = + binding.texture + ? FindOrCreateTextureDescriptor(*binding.texture, true, + binding.host_swizzle) + : UINT32_MAX; + } + } else { + binding.descriptor_index_signed = UINT32_MAX; + } } if (load_unsigned_data && binding.texture != nullptr) { LoadTextureData(binding.texture); @@ -1368,208 +1435,132 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) { } } -uint64_t TextureCache::GetDescriptorHashForActiveTextures( - const D3D12Shader::TextureSRV* texture_srvs, - uint32_t texture_srv_count) const { - XXH64_state_t hash_state; - XXH64_reset(&hash_state, 0); - for (uint32_t i = 0; i < texture_srv_count; ++i) { - const D3D12Shader::TextureSRV& texture_srv = texture_srvs[i]; - // There can be multiple SRVs of the same texture. - XXH64_update(&hash_state, &texture_srv.dimension, - sizeof(texture_srv.dimension)); - XXH64_update(&hash_state, &texture_srv.is_signed, - sizeof(texture_srv.is_signed)); +bool TextureCache::AreActiveTextureSRVKeysUpToDate( + const TextureSRVKey* keys, + const D3D12Shader::TextureBinding* host_shader_bindings, + uint32_t host_shader_binding_count) const { + for (uint32_t i = 0; i < host_shader_binding_count; ++i) { + const TextureSRVKey& key = keys[i]; const TextureBinding& binding = - texture_bindings_[texture_srv.fetch_constant]; - XXH64_update(&hash_state, &binding.key, sizeof(binding.key)); - XXH64_update(&hash_state, &binding.host_swizzle, - sizeof(binding.host_swizzle)); - XXH64_update(&hash_state, &binding.swizzled_signs, - sizeof(binding.swizzled_signs)); + texture_bindings_[host_shader_bindings[i].fetch_constant]; + if (key.key != binding.key || key.host_swizzle != binding.host_swizzle || + key.swizzled_signs != binding.swizzled_signs) { + return false; + } } - return XXH64_digest(&hash_state); + return true; } -void TextureCache::WriteTextureSRV(const D3D12Shader::TextureSRV& texture_srv, - D3D12_CPU_DESCRIPTOR_HANDLE handle) { - D3D12_SHADER_RESOURCE_VIEW_DESC desc; - desc.Format = DXGI_FORMAT_UNKNOWN; - Dimension binding_dimension; - uint32_t mip_max_level, array_size; +void TextureCache::WriteActiveTextureSRVKeys( + TextureSRVKey* keys, + const D3D12Shader::TextureBinding* host_shader_bindings, + uint32_t host_shader_binding_count) const { + for (uint32_t i = 0; i < host_shader_binding_count; ++i) { + TextureSRVKey& key = keys[i]; + const TextureBinding& binding = + texture_bindings_[host_shader_bindings[i].fetch_constant]; + key.key = binding.key; + key.host_swizzle = binding.host_swizzle; + key.swizzled_signs = binding.swizzled_signs; + } +} + +void TextureCache::WriteActiveTextureBindfulSRV( + const D3D12Shader::TextureBinding& host_shader_binding, + D3D12_CPU_DESCRIPTOR_HANDLE handle) { + assert_false(bindless_resources_used_); + const TextureBinding& binding = + texture_bindings_[host_shader_binding.fetch_constant]; + uint32_t descriptor_index = UINT32_MAX; Texture* texture = nullptr; - ID3D12Resource* resource = nullptr; - - const TextureBinding& binding = texture_bindings_[texture_srv.fetch_constant]; - if (!binding.key.IsInvalid()) { - TextureFormat format = binding.key.format; - - if (IsSignedVersionSeparate(format) && texture_srv.is_signed) { - texture = binding.texture_signed; - } else { - texture = binding.texture; - } - if (texture != nullptr) { - resource = texture->resource; - } - - if (texture_srv.is_signed) { + if (!binding.key.IsInvalid() && + AreDimensionsCompatible(host_shader_binding.dimension, + binding.key.dimension)) { + if (host_shader_binding.is_signed) { // Not supporting signed compressed textures - hopefully DXN and DXT5A are // not used as signed. if (texture_util::IsAnySignSigned(binding.swizzled_signs)) { - desc.Format = host_formats_[uint32_t(format)].dxgi_format_snorm; - if (desc.Format == DXGI_FORMAT_UNKNOWN) { - unsupported_format_features_used_[uint32_t(format)] |= - kUnsupportedSnormBit; - } + descriptor_index = binding.descriptor_index_signed; + texture = IsSignedVersionSeparate(binding.key.format) + ? binding.texture_signed + : binding.texture; } } else { if (texture_util::IsAnySignNotSigned(binding.swizzled_signs)) { - desc.Format = GetDXGIUnormFormat(binding.key); - if (desc.Format == DXGI_FORMAT_UNKNOWN) { - unsupported_format_features_used_[uint32_t(format)] |= - kUnsupportedUnormBit; - } + descriptor_index = binding.descriptor_index; + texture = binding.texture; } } - - binding_dimension = binding.key.dimension; - mip_max_level = binding.key.mip_max_level; - array_size = binding.key.depth; - // XE_GPU_SWIZZLE and D3D12_SHADER_COMPONENT_MAPPING are the same except for - // one bit. - desc.Shader4ComponentMapping = - binding.host_swizzle | - D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES; - } else { - binding_dimension = Dimension::k2D; - mip_max_level = 0; - array_size = 1; - desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, - D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0); } - - if (desc.Format == DXGI_FORMAT_UNKNOWN) { - // A null descriptor must still have a valid format. - desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - resource = nullptr; - } - NullSRVDescriptorIndex null_descriptor_index; - switch (texture_srv.dimension) { - case TextureDimension::k3D: - desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; - desc.Texture3D.MostDetailedMip = 0; - desc.Texture3D.MipLevels = mip_max_level + 1; - desc.Texture3D.ResourceMinLODClamp = 0.0f; - if (binding_dimension != Dimension::k3D) { - // Create a null descriptor so it's safe to sample this texture even - // though it has different dimensions. - resource = nullptr; - } - null_descriptor_index = NullSRVDescriptorIndex::k3D; - break; - case TextureDimension::kCube: - desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; - desc.TextureCube.MostDetailedMip = 0; - desc.TextureCube.MipLevels = mip_max_level + 1; - desc.TextureCube.ResourceMinLODClamp = 0.0f; - if (binding_dimension != Dimension::kCube) { - resource = nullptr; - } - null_descriptor_index = NullSRVDescriptorIndex::kCube; - break; - default: - desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - desc.Texture2DArray.MostDetailedMip = 0; - desc.Texture2DArray.MipLevels = mip_max_level + 1; - desc.Texture2DArray.FirstArraySlice = 0; - desc.Texture2DArray.ArraySize = array_size; - desc.Texture2DArray.PlaneSlice = 0; - desc.Texture2DArray.ResourceMinLODClamp = 0.0f; - if (binding_dimension == Dimension::k3D || - binding_dimension == Dimension::kCube) { - resource = nullptr; - } - null_descriptor_index = NullSRVDescriptorIndex::k2DArray; - break; - } - auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); + D3D12_CPU_DESCRIPTOR_HANDLE source_handle; + if (descriptor_index != UINT32_MAX) { + assert_not_null(texture); + MarkTextureUsed(texture); + source_handle = GetTextureDescriptorCPUHandle(descriptor_index); + } else { + NullSRVDescriptorIndex null_descriptor_index; + switch (host_shader_binding.dimension) { + case TextureDimension::k3D: + null_descriptor_index = NullSRVDescriptorIndex::k3D; + break; + case TextureDimension::kCube: + null_descriptor_index = NullSRVDescriptorIndex::kCube; + break; + default: + assert_true(host_shader_binding.dimension == TextureDimension::k1D || + host_shader_binding.dimension == TextureDimension::k2D); + null_descriptor_index = NullSRVDescriptorIndex::k2DArray; + } + source_handle = provider->OffsetViewDescriptor( + null_srv_descriptor_heap_start_, uint32_t(null_descriptor_index)); + } auto device = provider->GetDevice(); - if (resource == nullptr) { - // Copy a pre-made null descriptor since it's faster than to create an SRV. - device->CopyDescriptorsSimple( - 1, handle, - provider->OffsetViewDescriptor(null_srv_descriptor_heap_start_, - uint32_t(null_descriptor_index)), - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - return; - } - MarkTextureUsed(texture); - // Take the descriptor from the cache if it's cached, or create a new one in - // the cache, or directly if this texture was already used with a different - // swizzle. Profiling results say that CreateShaderResourceView takes the - // longest time of draw call processing, and it's very noticeable in many - // games. - bool cached_handle_available = false; - D3D12_CPU_DESCRIPTOR_HANDLE cached_handle = {}; - assert_not_null(texture); - if (texture->cached_srv_descriptor_swizzle != - Texture::kCachedSRVDescriptorSwizzleMissing) { - // Use an existing cached descriptor if it has the needed swizzle. - if (binding.host_swizzle == texture->cached_srv_descriptor_swizzle) { - cached_handle_available = true; - cached_handle = texture->cached_srv_descriptor; - } - } else { - // Try to create a new cached descriptor if it doesn't exist yet. - if (!srv_descriptor_cache_free_.empty()) { - cached_handle_available = true; - cached_handle = srv_descriptor_cache_free_.back(); - srv_descriptor_cache_free_.pop_back(); - } else if (srv_descriptor_cache_.empty() || - srv_descriptor_cache_.back().current_usage >= - SRVDescriptorCachePage::kHeapSize) { - D3D12_DESCRIPTOR_HEAP_DESC new_heap_desc; - new_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - new_heap_desc.NumDescriptors = SRVDescriptorCachePage::kHeapSize; - new_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; - new_heap_desc.NodeMask = 0; - ID3D12DescriptorHeap* new_heap; - if (SUCCEEDED(device->CreateDescriptorHeap(&new_heap_desc, - IID_PPV_ARGS(&new_heap)))) { - SRVDescriptorCachePage new_page; - new_page.heap = new_heap; - new_page.heap_start = new_heap->GetCPUDescriptorHandleForHeapStart(); - new_page.current_usage = 1; - cached_handle_available = true; - cached_handle = new_page.heap_start; - srv_descriptor_cache_.push_back(new_page); - } - } else { - SRVDescriptorCachePage& page = srv_descriptor_cache_.back(); - cached_handle_available = true; - cached_handle = - provider->OffsetViewDescriptor(page.heap_start, page.current_usage); - ++page.current_usage; - } - if (cached_handle_available) { - device->CreateShaderResourceView(resource, &desc, cached_handle); - texture->cached_srv_descriptor = cached_handle; - texture->cached_srv_descriptor_swizzle = binding.host_swizzle; - } - } - if (cached_handle_available) { - device->CopyDescriptorsSimple(1, handle, cached_handle, + { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_i( + "gpu", + "xe::gpu::d3d12::TextureCache::WriteActiveTextureBindfulSRV->" + "CopyDescriptorsSimple"); +#endif // FINE_GRAINED_DRAW_SCOPES + device->CopyDescriptorsSimple(1, handle, source_handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - } else { - device->CreateShaderResourceView(resource, &desc, handle); } } +uint32_t TextureCache::GetActiveTextureBindlessSRVIndex( + const D3D12Shader::TextureBinding& host_shader_binding) { + assert_true(bindless_resources_used_); + uint32_t descriptor_index = UINT32_MAX; + const TextureBinding& binding = + texture_bindings_[host_shader_binding.fetch_constant]; + if (!binding.key.IsInvalid() && + AreDimensionsCompatible(host_shader_binding.dimension, + binding.key.dimension)) { + descriptor_index = host_shader_binding.is_signed + ? binding.descriptor_index_signed + : binding.descriptor_index; + } + if (descriptor_index == UINT32_MAX) { + switch (host_shader_binding.dimension) { + case TextureDimension::k3D: + descriptor_index = + uint32_t(D3D12CommandProcessor::SystemBindlessView::kNullTexture3D); + break; + case TextureDimension::kCube: + descriptor_index = uint32_t( + D3D12CommandProcessor::SystemBindlessView::kNullTextureCube); + break; + default: + assert_true(host_shader_binding.dimension == TextureDimension::k1D || + host_shader_binding.dimension == TextureDimension::k2D); + descriptor_index = uint32_t( + D3D12CommandProcessor::SystemBindlessView::kNullTexture2DArray); + } + } + return descriptor_index; +} + TextureCache::SamplerParameters TextureCache::GetSamplerParameters( const D3D12Shader::SamplerBinding& binding) const { auto& regs = *register_file_; @@ -1583,12 +1574,11 @@ TextureCache::SamplerParameters TextureCache::GetSamplerParameters( parameters.clamp_z = fetch.clamp_z; parameters.border_color = fetch.border_color; - uint32_t mip_min_level, mip_max_level; + uint32_t mip_min_level; texture_util::GetSubresourcesFromFetchConstant( fetch, nullptr, nullptr, nullptr, nullptr, nullptr, &mip_min_level, - &mip_max_level, binding.mip_filter); + nullptr, binding.mip_filter); parameters.mip_min_level = mip_min_level; - parameters.mip_max_level = std::max(mip_max_level, mip_min_level); AnisoFilter aniso_filter = binding.aniso_filter == AnisoFilter::kUseFetchConst ? fetch.aniso_filter @@ -1675,7 +1665,8 @@ void TextureCache::WriteSampler(SamplerParameters parameters, desc.BorderColor[3] = 0.0f; } desc.MinLOD = float(parameters.mip_min_level); - desc.MaxLOD = float(parameters.mip_max_level); + // Maximum mip level is in the texture resource itself. + desc.MaxLOD = FLT_MAX; auto device = command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); device->CreateSampler(&desc, handle); @@ -1737,8 +1728,8 @@ bool TextureCache::TileResolvedTexture( resolve_tile_mode_info_[uint32_t(resolve_tile_mode)]; auto command_list = command_processor_->GetDeferredCommandList(); - auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); - auto device = provider->GetDevice(); + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); uint32_t resolution_scale_log2 = IsResolutionScale2X() ? 1 : 0; texture_base &= 0x1FFFFFFF; @@ -1811,12 +1802,8 @@ bool TextureCache::TileResolvedTexture( } // Tile the texture. - D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; - D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; - if (command_processor_->RequestViewDescriptors( - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2, - descriptor_cpu_start, descriptor_gpu_start) == - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2]; + if (!command_processor_->RequestOneUseSingleViewDescriptors(2, descriptors)) { return false; } if (resolution_scale_log2) { @@ -1826,19 +1813,15 @@ bool TextureCache::TileResolvedTexture( } command_processor_->SubmitBarriers(); command_list->D3DSetComputeRootSignature(resolve_tile_root_signature_); + ResolveTileConstants resolve_tile_constants; - resolve_tile_constants.info = uint32_t(endian) | (uint32_t(format) << 3) | - (resolution_scale_log2 << 9) | - ((texture_pitch >> 5) << 10) | - (is_3d ? ((texture_height >> 5) << 19) : 0); - resolve_tile_constants.offset = offset_x | (offset_y << 5) | (offset_z << 10); - resolve_tile_constants.size = resolve_width | (resolve_height << 16); - resolve_tile_constants.host_base = uint32_t(footprint.Offset); - resolve_tile_constants.host_pitch = uint32_t(footprint.Footprint.RowPitch); - ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start, buffer, + + // TODO(Triang3l): Use precreated bindless descriptors here after overall + // cleanup/optimization involving typed buffers. + ui::d3d12::util::CreateRawBufferSRV(device, descriptors[1].first, buffer, buffer_size); - D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_uav = - provider->OffsetViewDescriptor(descriptor_cpu_start, 1); + command_list->D3DSetComputeRootDescriptorTable(2, descriptors[1].second); + if (resolve_tile_mode_info.typed_uav_format != DXGI_FORMAT_UNKNOWN) { // Not sure if this alignment is actually needed in Direct3D 12, but for // safety. Also not using the full 512 MB buffer as a typed UAV because @@ -1862,22 +1845,32 @@ bool TextureCache::TileResolvedTexture( device->CreateUnorderedAccessView(resolution_scale_log2 ? scaled_resolve_buffer_ : shared_memory_->GetBuffer(), - nullptr, &uav_desc, descriptor_cpu_uav); + nullptr, &uav_desc, descriptors[0].first); } else { if (resolution_scale_log2) { resolve_tile_constants.guest_base = texture_base & 0xFFF; CreateScaledResolveBufferRawUAV( - descriptor_cpu_uav, texture_base >> 12, + descriptors[0].first, texture_base >> 12, ((texture_base + texture_size - 1) >> 12) - (texture_base >> 12) + 1); } else { resolve_tile_constants.guest_base = texture_base; - shared_memory_->WriteRawUAVDescriptor(descriptor_cpu_uav); + shared_memory_->WriteRawUAVDescriptor(descriptors[0].first); } } - command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start); + command_list->D3DSetComputeRootDescriptorTable(1, descriptors[0].second); + + resolve_tile_constants.info = uint32_t(endian) | (uint32_t(format) << 3) | + (resolution_scale_log2 << 9) | + ((texture_pitch >> 5) << 10) | + (is_3d ? ((texture_height >> 5) << 19) : 0); + resolve_tile_constants.offset = offset_x | (offset_y << 5) | (offset_z << 10); + resolve_tile_constants.size = resolve_width | (resolve_height << 16); + resolve_tile_constants.host_base = uint32_t(footprint.Offset); + resolve_tile_constants.host_pitch = uint32_t(footprint.Footprint.RowPitch); command_list->D3DSetComputeRoot32BitConstants( 0, sizeof(resolve_tile_constants) / sizeof(uint32_t), &resolve_tile_constants, 0); + command_processor_->SetComputePipeline( resolve_tile_pipelines_[uint32_t(resolve_tile_mode)]); // Each group processes 32x32 texels after resolution scaling has been @@ -2339,8 +2332,6 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { } texture->base_watch_handle = nullptr; texture->mip_watch_handle = nullptr; - texture->cached_srv_descriptor_swizzle = - Texture::kCachedSRVDescriptorSwizzleMissing; textures_.insert(std::make_pair(map_key, texture)); COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); textures_total_size_ += texture->resource_size; @@ -2364,8 +2355,8 @@ bool TextureCache::LoadTextureData(Texture* texture) { } auto command_list = command_processor_->GetDeferredCommandList(); - auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); - auto device = provider->GetDevice(); + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); // Get the pipeline. LoadMode load_mode = GetLoadMode(texture->key); @@ -2453,16 +2444,19 @@ bool TextureCache::LoadTextureData(Texture* texture) { // descriptors for base and mips. bool separate_base_and_mips_descriptors = scaled_resolve && mip_first == 0 && mip_last != 0; + // TODO(Triang3l): Use precreated bindless descriptors here after overall + // cleanup/optimization involving typed buffers. uint32_t descriptor_count = separate_base_and_mips_descriptors ? 4 : 2; - D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; - D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; - if (command_processor_->RequestViewDescriptors( - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, descriptor_count, - descriptor_count, descriptor_cpu_start, descriptor_gpu_start) == - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { - command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); + ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[4]; + if (!command_processor_->RequestOneUseSingleViewDescriptors(descriptor_count, + descriptors)) { return false; } + // Create two destination descriptors since the table has both. + for (uint32_t i = 0; i < descriptor_count; i += 2) { + ui::d3d12::util::CreateRawBufferUAV(device, descriptors[i].first, + copy_buffer, uint32_t(host_slice_size)); + } if (scaled_resolve) { // TODO(Triang3l): Allow partial invalidation of scaled textures - send a // part of scaled_resolve_pages_ to the shader and choose the source @@ -2470,35 +2464,28 @@ bool TextureCache::LoadTextureData(Texture* texture) { // it's not, duplicate the texels from the unscaled version - will be // blocky with filtering, but better than nothing. UseScaledResolveBufferForReading(); - uint32_t srv_descriptor_offset = 0; + uint32_t source_descriptor_index = 1; if (mip_first == 0) { CreateScaledResolveBufferRawSRV( - provider->OffsetViewDescriptor(descriptor_cpu_start, - srv_descriptor_offset), - texture->key.base_page, (texture->base_size + 0xFFF) >> 12); - srv_descriptor_offset += 2; + descriptors[source_descriptor_index].first, texture->key.base_page, + (texture->base_size + 0xFFF) >> 12); + source_descriptor_index += 2; } if (mip_last != 0) { CreateScaledResolveBufferRawSRV( - provider->OffsetViewDescriptor(descriptor_cpu_start, - srv_descriptor_offset), - texture->key.mip_page, (texture->mip_size + 0xFFF) >> 12); + descriptors[source_descriptor_index].first, texture->key.mip_page, + (texture->mip_size + 0xFFF) >> 12); } } else { shared_memory_->UseForReading(); - shared_memory_->WriteRawSRVDescriptor(descriptor_cpu_start); - } - // Create two destination descriptors since the table has both. - for (uint32_t i = 1; i < descriptor_count; i += 2) { - ui::d3d12::util::CreateRawBufferUAV( - device, provider->OffsetViewDescriptor(descriptor_cpu_start, i), - copy_buffer, uint32_t(host_slice_size)); + shared_memory_->WriteRawSRVDescriptor(descriptors[1].first); } command_processor_->SetComputePipeline(pipeline); command_list->D3DSetComputeRootSignature(load_root_signature_); if (!separate_base_and_mips_descriptors) { - // Will be bound later. - command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start); + // Will be bound later if separate base and mip descriptors. + command_list->D3DSetComputeRootDescriptorTable(2, descriptors[1].second); + command_list->D3DSetComputeRootDescriptorTable(1, descriptors[0].second); } // Submit commands. @@ -2575,14 +2562,11 @@ bool TextureCache::LoadTextureData(Texture* texture) { } std::memcpy(cbuffer_mapping, &load_constants, sizeof(load_constants)); command_list->D3DSetComputeRootConstantBufferView(0, cbuffer_gpu_address); - if (separate_base_and_mips_descriptors) { - if (j == 0) { - command_list->D3DSetComputeRootDescriptorTable(1, - descriptor_gpu_start); - } else if (j == 1) { - command_list->D3DSetComputeRootDescriptorTable( - 1, provider->OffsetViewDescriptor(descriptor_gpu_start, 2)); - } + if (separate_base_and_mips_descriptors && j <= 1) { + command_list->D3DSetComputeRootDescriptorTable( + 2, descriptors[j * 2 + 1].second); + command_list->D3DSetComputeRootDescriptorTable( + 1, descriptors[j * 2].second); } command_processor_->SubmitBarriers(); // Each thread group processes 32x32x1 blocks after resolution scaling has @@ -2642,6 +2626,138 @@ bool TextureCache::LoadTextureData(Texture* texture) { return true; } +uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture, + bool is_signed, + uint32_t host_swizzle) { + uint32_t descriptor_key = uint32_t(is_signed) | (host_swizzle << 1); + + // Try to find an existing descriptor. + auto it = texture.srv_descriptors.find(descriptor_key); + if (it != texture.srv_descriptors.end()) { + return it->second; + } + + // Create a new bindless or cached descriptor if supported. + D3D12_SHADER_RESOURCE_VIEW_DESC desc; + + TextureFormat format = texture.key.format; + if (IsSignedVersionSeparate(format) && + texture.key.signed_separate != uint32_t(is_signed)) { + // Not the version with the needed signedness. + return UINT32_MAX; + } + if (is_signed) { + // Not supporting signed compressed textures - hopefully DXN and DXT5A are + // not used as signed. + desc.Format = host_formats_[uint32_t(format)].dxgi_format_snorm; + } else { + desc.Format = GetDXGIUnormFormat(texture.key); + } + if (desc.Format == DXGI_FORMAT_UNKNOWN) { + unsupported_format_features_used_[uint32_t(format)] |= + is_signed ? kUnsupportedSnormBit : kUnsupportedUnormBit; + return UINT32_MAX; + } + + uint32_t mip_levels = texture.key.mip_max_level + 1; + switch (texture.key.dimension) { + case Dimension::k1D: + case Dimension::k2D: + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + desc.Texture2DArray.MostDetailedMip = 0; + desc.Texture2DArray.MipLevels = mip_levels; + desc.Texture2DArray.FirstArraySlice = 0; + desc.Texture2DArray.ArraySize = texture.key.depth; + desc.Texture2DArray.PlaneSlice = 0; + desc.Texture2DArray.ResourceMinLODClamp = 0.0f; + break; + case Dimension::k3D: + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + desc.Texture3D.MostDetailedMip = 0; + desc.Texture3D.MipLevels = mip_levels; + desc.Texture3D.ResourceMinLODClamp = 0.0f; + break; + case Dimension::kCube: + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + desc.TextureCube.MostDetailedMip = 0; + desc.TextureCube.MipLevels = mip_levels; + desc.TextureCube.ResourceMinLODClamp = 0.0f; + break; + default: + assert_unhandled_case(texture.key.dimension); + return UINT32_MAX; + } + + desc.Shader4ComponentMapping = + host_swizzle | + D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES; + + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); + uint32_t descriptor_index; + if (bindless_resources_used_) { + descriptor_index = + command_processor_->RequestPersistentViewBindlessDescriptor(); + if (descriptor_index == UINT32_MAX) { + XELOGE( + "Failed to create a texture descriptor - no free bindless view " + "descriptors"); + return UINT32_MAX; + } + } else { + if (!srv_descriptor_cache_free_.empty()) { + descriptor_index = srv_descriptor_cache_free_.back(); + srv_descriptor_cache_free_.pop_back(); + } else { + // Allocated + 1 (including the descriptor that is being added), rounded + // up to SRVDescriptorCachePage::kHeapSize, (allocated + 1 + size - 1). + uint32_t cache_pages_needed = (srv_descriptor_cache_allocated_ + + SRVDescriptorCachePage::kHeapSize) / + SRVDescriptorCachePage::kHeapSize; + if (srv_descriptor_cache_.size() < cache_pages_needed) { + D3D12_DESCRIPTOR_HEAP_DESC cache_heap_desc; + cache_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + cache_heap_desc.NumDescriptors = SRVDescriptorCachePage::kHeapSize; + cache_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + cache_heap_desc.NodeMask = 0; + while (srv_descriptor_cache_.size() < cache_pages_needed) { + SRVDescriptorCachePage cache_page; + if (FAILED(device->CreateDescriptorHeap( + &cache_heap_desc, IID_PPV_ARGS(&cache_page.heap)))) { + XELOGE( + "Failed to create a texture descriptor - couldn't create a " + "descriptor cache heap"); + return UINT32_MAX; + } + cache_page.heap_start = + cache_page.heap->GetCPUDescriptorHandleForHeapStart(); + srv_descriptor_cache_.push_back(cache_page); + } + } + descriptor_index = srv_descriptor_cache_allocated_++; + } + } + device->CreateShaderResourceView( + texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index)); + texture.srv_descriptors.insert({descriptor_key, descriptor_index}); + return descriptor_index; +} + +D3D12_CPU_DESCRIPTOR_HANDLE TextureCache::GetTextureDescriptorCPUHandle( + uint32_t descriptor_index) const { + auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); + if (bindless_resources_used_) { + return provider->OffsetViewDescriptor( + command_processor_->GetViewBindlessHeapCPUStart(), descriptor_index); + } + D3D12_CPU_DESCRIPTOR_HANDLE heap_start = + srv_descriptor_cache_[descriptor_index / + SRVDescriptorCachePage::kHeapSize] + .heap_start; + uint32_t heap_offset = descriptor_index % SRVDescriptorCachePage::kHeapSize; + return provider->OffsetViewDescriptor(heap_start, heap_offset); +} + void TextureCache::MarkTextureUsed(Texture* texture) { uint64_t current_frame = command_processor_->GetCurrentFrame(); // This is called very frequently, don't relink unless needed for caching. @@ -2687,8 +2803,10 @@ void TextureCache::WatchCallback(Texture* texture, bool is_mip) { } void TextureCache::ClearBindings() { - std::memset(texture_bindings_, 0, sizeof(texture_bindings_)); - texture_keys_in_sync_ = 0; + for (size_t i = 0; i < xe::countof(texture_bindings_); ++i) { + texture_bindings_[i].Clear(); + } + texture_bindings_in_sync_ = 0; // Already reset everything. texture_invalidated_.store(false, std::memory_order_relaxed); } diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index 18a7e3741..49c0e807d 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -11,7 +11,9 @@ #define XENIA_GPU_D3D12_TEXTURE_CACHE_H_ #include +#include #include +#include #include "xenia/base/mutex.h" #include "xenia/gpu/d3d12/d3d12_shader.h" @@ -55,9 +57,84 @@ class D3D12CommandProcessor; // MipAddress but no BaseAddress to save memory because textures are streamed // this way anyway. class TextureCache { + union TextureKey { + struct { + // Physical 4 KB page with the base mip level, disregarding A/C/E address + // range prefix. + uint32_t base_page : 17; // 17 total + Dimension dimension : 2; // 19 + uint32_t width : 13; // 32 + + uint32_t height : 13; // 45 + uint32_t tiled : 1; // 46 + uint32_t packed_mips : 1; // 47 + // Physical 4 KB page with mip 1 and smaller. + uint32_t mip_page : 17; // 64 + + // Layers for stacked and 3D, 6 for cube, 1 for other dimensions. + uint32_t depth : 10; // 74 + uint32_t mip_max_level : 4; // 78 + TextureFormat format : 6; // 84 + Endian endianness : 2; // 86 + // Whether this texture is signed and has a different host representation + // than an unsigned view of the same guest texture. + uint32_t signed_separate : 1; // 87 + // Whether this texture is a 2x-scaled resolve target. + uint32_t scaled_resolve : 1; // 88 + }; + struct { + // The key used for unordered_multimap lookup. Single uint32_t instead of + // a uint64_t so XXH hash can be calculated in a stable way due to no + // padding. + uint32_t map_key[2]; + // The key used to identify one texture within unordered_multimap buckets. + uint32_t bucket_key; + }; + TextureKey() { MakeInvalid(); } + TextureKey(const TextureKey& key) { + SetMapKey(key.GetMapKey()); + bucket_key = key.bucket_key; + } + TextureKey& operator=(const TextureKey& key) { + SetMapKey(key.GetMapKey()); + bucket_key = key.bucket_key; + return *this; + } + bool operator==(const TextureKey& key) const { + return GetMapKey() == key.GetMapKey() && bucket_key == key.bucket_key; + } + bool operator!=(const TextureKey& key) const { + return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key; + } + inline uint64_t GetMapKey() const { + return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32); + } + inline void SetMapKey(uint64_t key) { + map_key[0] = uint32_t(key); + map_key[1] = uint32_t(key >> 32); + } + inline bool IsInvalid() const { + // Zero base and zero width is enough for a binding to be invalid. + return map_key[0] == 0; + } + inline void MakeInvalid() { + // Reset all for a stable hash. + SetMapKey(0); + bucket_key = 0; + } + }; + public: + // Keys that can be stored for checking validity whether descriptors for host + // shader bindings are up to date. + struct TextureSRVKey { + TextureKey key; + uint32_t host_swizzle; + uint8_t swizzled_signs; + }; + // Sampler parameters that can be directly converted to a host sampler or used - // for binding hashing. + // for binding checking validity whether samplers are up to date. union SamplerParameters { struct { ClampMode clamp_x : 3; // 3 @@ -70,7 +147,7 @@ class TextureCache { uint32_t mip_linear : 1; // 14 AnisoFilter aniso_filter : 3; // 17 uint32_t mip_min_level : 4; // 21 - uint32_t mip_max_level : 4; // 25 + // Maximum mip level is in the texture resource itself. }; uint32_t value; @@ -91,7 +168,8 @@ class TextureCache { }; TextureCache(D3D12CommandProcessor* command_processor, - RegisterFile* register_file, SharedMemory* shared_memory); + RegisterFile* register_file, bool bindless_resources_used, + SharedMemory* shared_memory); ~TextureCache(); bool Initialize(bool edram_rov_used); @@ -109,19 +187,33 @@ class TextureCache { // binding the actual drawing pipeline. void RequestTextures(uint32_t used_texture_mask); - // Returns the hash of the current bindings (must be called after - // RequestTextures) for the provided SRV descriptor layout. - uint64_t GetDescriptorHashForActiveTextures( - const D3D12Shader::TextureSRV* texture_srvs, - uint32_t texture_srv_count) const; + // "ActiveTexture" means as of the latest RequestTextures call. + + // Returns whether texture SRV keys stored externally are still valid for the + // current bindings and host shader binding layout. Both keys and + // host_shader_bindings must have host_shader_binding_count elements + // (otherwise they are incompatible - like if this function returned false). + bool AreActiveTextureSRVKeysUpToDate( + const TextureSRVKey* keys, + const D3D12Shader::TextureBinding* host_shader_bindings, + uint32_t host_shader_binding_count) const; + // Exports the current binding data to texture SRV keys so they can be stored + // for checking whether subsequent draw calls can keep using the same + // bindings. Write host_shader_binding_count keys. + void WriteActiveTextureSRVKeys( + TextureSRVKey* keys, + const D3D12Shader::TextureBinding* host_shader_bindings, + uint32_t host_shader_binding_count) const; // Returns the post-swizzle signedness of a currently bound texture (must be // called after RequestTextures). uint8_t GetActiveTextureSwizzledSigns(uint32_t index) const { return texture_bindings_[index].swizzled_signs; } - - void WriteTextureSRV(const D3D12Shader::TextureSRV& texture_srv, - D3D12_CPU_DESCRIPTOR_HANDLE handle); + void WriteActiveTextureBindfulSRV( + const D3D12Shader::TextureBinding& host_shader_binding, + D3D12_CPU_DESCRIPTOR_HANDLE handle); + uint32_t GetActiveTextureBindlessSRVIndex( + const D3D12Shader::TextureBinding& host_shader_binding); SamplerParameters GetSamplerParameters( const D3D12Shader::SamplerBinding& binding) const; @@ -276,73 +368,6 @@ class TextureCache { uint8_t swizzle[4]; }; - union TextureKey { - struct { - // Physical 4 KB page with the base mip level, disregarding A/C/E address - // range prefix. - uint32_t base_page : 17; // 17 total - Dimension dimension : 2; // 19 - uint32_t width : 13; // 32 - - uint32_t height : 13; // 45 - uint32_t tiled : 1; // 46 - uint32_t packed_mips : 1; // 47 - // Physical 4 KB page with mip 1 and smaller. - uint32_t mip_page : 17; // 64 - - // Layers for stacked and 3D, 6 for cube, 1 for other dimensions. - uint32_t depth : 10; // 74 - uint32_t mip_max_level : 4; // 78 - TextureFormat format : 6; // 84 - Endian endianness : 2; // 86 - // Whether this texture is signed and has a different host representation - // than an unsigned view of the same guest texture. - uint32_t signed_separate : 1; // 87 - // Whether this texture is a 2x-scaled resolve target. - uint32_t scaled_resolve : 1; // 88 - }; - struct { - // The key used for unordered_multimap lookup. Single uint32_t instead of - // a uint64_t so XXH hash can be calculated in a stable way due to no - // padding. - uint32_t map_key[2]; - // The key used to identify one texture within unordered_multimap buckets. - uint32_t bucket_key; - }; - TextureKey() { MakeInvalid(); } - TextureKey(const TextureKey& key) { - SetMapKey(key.GetMapKey()); - bucket_key = key.bucket_key; - } - TextureKey& operator=(const TextureKey& key) { - SetMapKey(key.GetMapKey()); - bucket_key = key.bucket_key; - return *this; - } - bool operator==(const TextureKey& key) const { - return GetMapKey() == key.GetMapKey() && bucket_key == key.bucket_key; - } - bool operator!=(const TextureKey& key) const { - return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key; - } - inline uint64_t GetMapKey() const { - return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32); - } - inline void SetMapKey(uint64_t key) { - map_key[0] = uint32_t(key); - map_key[1] = uint32_t(key >> 32); - } - inline bool IsInvalid() const { - // Zero base and zero width is enough for a binding to be invalid. - return map_key[0] == 0; - } - inline void MakeInvalid() { - // Reset all for a stable hash. - SetMapKey(0); - bucket_key = 0; - } - }; - struct Texture { TextureKey key; ID3D12Resource* resource; @@ -367,13 +392,11 @@ class TextureCache { // Row pitches on each mip level (for linear layout mainly). uint32_t pitches[14]; - // SRV descriptor from the cache, for the first swizzle the texture was used - // with (which is usually determined by the format, such as RGBA or BGRA). - // If swizzle is kCachedSRVDescriptorSwizzleMissing, the cached descriptor - // doesn't exist yet (there are no invalid D3D descriptor handle values). - D3D12_CPU_DESCRIPTOR_HANDLE cached_srv_descriptor; - static constexpr uint32_t kCachedSRVDescriptorSwizzleMissing = UINT32_MAX; - uint32_t cached_srv_descriptor_swizzle; + // For bindful - indices in the non-shader-visible descriptor cache for + // copying to the shader-visible heap (much faster than recreating, which, + // according to profiling, was often a bottleneck in many games). + // For bindless - indices in the global shader-visible descriptor heap. + std::unordered_map srv_descriptors; // These are to be accessed within the global critical region to synchronize // with shared memory. @@ -390,7 +413,6 @@ class TextureCache { static constexpr uint32_t kHeapSize = 65536; ID3D12DescriptorHeap* heap; D3D12_CPU_DESCRIPTOR_HANDLE heap_start; - uint32_t current_usage; }; struct LoadConstants { @@ -459,6 +481,14 @@ class TextureCache { // Signed version of the texture if the data in the signed version is // different on the host. Texture* texture_signed; + // Descriptor indices of texture and texture_signed returned from + // FindOrCreateTextureDescriptor. + uint32_t descriptor_index; + uint32_t descriptor_index_signed; + void Clear() { + std::memset(this, 0, sizeof(*this)); + descriptor_index = descriptor_index_signed = UINT32_MAX; + } }; // Whether the signed version of the texture has a different representation on @@ -505,6 +535,22 @@ class TextureCache { const xenos::xe_gpu_texture_fetch_t& fetch, TextureKey& key_out, uint32_t* host_swizzle_out, uint8_t* swizzled_signs_out); + static constexpr bool AreDimensionsCompatible( + TextureDimension binding_dimension, Dimension resource_dimension) { + switch (binding_dimension) { + case TextureDimension::k1D: + case TextureDimension::k2D: + return resource_dimension == Dimension::k1D || + resource_dimension == Dimension::k2D; + case TextureDimension::k3D: + return resource_dimension == Dimension::k3D; + case TextureDimension::kCube: + return resource_dimension == Dimension::kCube; + default: + return false; + } + } + static void LogTextureKeyAction(TextureKey key, const char* action); static void LogTextureAction(const Texture* texture, const char* action); @@ -517,6 +563,14 @@ class TextureCache { // allocates descriptors and copies! bool LoadTextureData(Texture* texture); + // Returns the index of an existing of a newly created non-shader-visible + // cached (for bindful) or a shader-visible global (for bindless) descriptor, + // or UINT32_MAX if failed to create. + uint32_t FindOrCreateTextureDescriptor(Texture& texture, bool is_signed, + uint32_t host_swizzle); + D3D12_CPU_DESCRIPTOR_HANDLE GetTextureDescriptorCPUHandle( + uint32_t descriptor_index) const; + // For LRU caching - updates the last usage frame and moves the texture to // the end of the usage queue. Must be called any time the texture is // referenced by any command list to make sure it's not destroyed while still @@ -552,6 +606,7 @@ class TextureCache { D3D12CommandProcessor* command_processor_; RegisterFile* register_file_; + bool bindless_resources_used_; SharedMemory* shared_memory_; static const LoadModeInfo load_mode_info_[]; @@ -571,8 +626,9 @@ class TextureCache { uint64_t texture_current_usage_time_; std::vector srv_descriptor_cache_; - // Cached descriptors used by deleted textures, for reuse. - std::vector srv_descriptor_cache_free_; + uint32_t srv_descriptor_cache_allocated_; + // Indices of cached descriptors used by deleted textures, for reuse. + std::vector srv_descriptor_cache_free_; enum class NullSRVDescriptorIndex { k2DArray, @@ -587,9 +643,9 @@ class TextureCache { D3D12_CPU_DESCRIPTOR_HANDLE null_srv_descriptor_heap_start_; TextureBinding texture_bindings_[32] = {}; - // Bit vector with bits reset on fetch constant writes to avoid getting - // texture keys from the fetch constants again and again. - uint32_t texture_keys_in_sync_ = 0; + // Bit vector with bits reset on fetch constant writes to avoid parsing fetch + // constants again and again. + uint32_t texture_bindings_in_sync_ = 0; // Whether a texture has been invalidated (a watch has been triggered), so // need to try to reload textures, disregarding whether fetch constants have diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 61a60ad8a..716e5a9a0 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -62,8 +62,8 @@ using namespace ucode; // S#/T#/U# binding index, and the second is the s#/t#/u# register index // within its space. -constexpr uint32_t DxbcShaderTranslator::kMaxTextureSRVIndexBits; -constexpr uint32_t DxbcShaderTranslator::kMaxTextureSRVs; +constexpr uint32_t DxbcShaderTranslator::kMaxTextureBindingIndexBits; +constexpr uint32_t DxbcShaderTranslator::kMaxTextureBindings; constexpr uint32_t DxbcShaderTranslator::kMaxSamplerBindingIndexBits; constexpr uint32_t DxbcShaderTranslator::kMaxSamplerBindings; constexpr uint32_t DxbcShaderTranslator::kInterpolatorCount; @@ -74,13 +74,16 @@ constexpr uint32_t DxbcShaderTranslator::kSwizzleXXXX; constexpr uint32_t DxbcShaderTranslator::kSwizzleYYYY; constexpr uint32_t DxbcShaderTranslator::kSwizzleZZZZ; constexpr uint32_t DxbcShaderTranslator::kSwizzleWWWW; -constexpr uint32_t DxbcShaderTranslator::kCbufferIndexUnallocated; +constexpr uint32_t DxbcShaderTranslator::kBindingIndexUnallocated; constexpr uint32_t DxbcShaderTranslator::kCfExecBoolConstantNone; DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id, + bool bindless_resources_used, bool edram_rov_used, bool force_emit_source_map) - : vendor_id_(vendor_id), edram_rov_used_(edram_rov_used) { + : vendor_id_(vendor_id), + bindless_resources_used_(bindless_resources_used), + edram_rov_used_(edram_rov_used) { emit_source_map_ = force_emit_source_map || cvars::dxbc_source_map; // Don't allocate again and again for the first shader. shader_code_.reserve(8192); @@ -154,9 +157,10 @@ void DxbcShaderTranslator::Reset() { cbuffer_count_ = 0; // System constants always used in prologues/epilogues. cbuffer_index_system_constants_ = cbuffer_count_++; - cbuffer_index_float_constants_ = kCbufferIndexUnallocated; - cbuffer_index_bool_loop_constants_ = kCbufferIndexUnallocated; - cbuffer_index_fetch_constants_ = kCbufferIndexUnallocated; + cbuffer_index_float_constants_ = kBindingIndexUnallocated; + cbuffer_index_bool_loop_constants_ = kBindingIndexUnallocated; + cbuffer_index_fetch_constants_ = kBindingIndexUnallocated; + cbuffer_index_descriptor_indices_ = kBindingIndexUnallocated; system_constants_used_ = 0; @@ -172,7 +176,19 @@ void DxbcShaderTranslator::Reset() { cf_instruction_predicate_if_open_ = false; cf_exec_predicate_written_ = false; - texture_srvs_.clear(); + srv_count_ = 0; + srv_index_shared_memory_ = kBindingIndexUnallocated; + srv_index_bindless_textures_2d_ = kBindingIndexUnallocated; + srv_index_bindless_textures_3d_ = kBindingIndexUnallocated; + srv_index_bindless_textures_cube_ = kBindingIndexUnallocated; + + texture_bindings_.clear(); + texture_bindings_for_bindful_srv_indices_.clear(); + + uav_count_ = 0; + uav_index_shared_memory_ = kBindingIndexUnallocated; + uav_index_edram_ = kBindingIndexUnallocated; + sampler_bindings_.clear(); memexport_alloc_current_count_ = 0; @@ -1369,7 +1385,7 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand( } } break; case InstructionStorageSource::kConstantFloat: { - if (cbuffer_index_float_constants_ == kCbufferIndexUnallocated) { + if (cbuffer_index_float_constants_ == kBindingIndexUnallocated) { cbuffer_index_float_constants_ = cbuffer_count_++; } if (operand.storage_addressing_mode == @@ -1600,7 +1616,7 @@ void DxbcShaderTranslator::UpdateExecConditionalsAndEmitDisassembly( if (type == ParsedExecInstruction::Type::kConditional) { uint32_t bool_constant_test_temp = PushSystemTemp(); // Check the bool constant value. - if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) { + if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) { cbuffer_index_bool_loop_constants_ = cbuffer_count_++; } DxbcOpAnd(DxbcDest::R(bool_constant_test_temp, 0b0001), @@ -1755,7 +1771,7 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction( // Count (unsigned) in bits 0:7 of the loop constant, initial aL (unsigned) in // 8:15. Starting from vector 2 because of bool constants. - if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) { + if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) { cbuffer_index_bool_loop_constants_ = cbuffer_count_++; } DxbcSrc loop_constant_src( @@ -1843,7 +1859,7 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction( uint32_t aL_add_temp = PushSystemTemp(); // Extract the value to add to aL (signed, in bits 16:23 of the loop // constant). Starting from vector 2 because of bool constants. - if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) { + if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) { cbuffer_index_bool_loop_constants_ = cbuffer_count_++; } DxbcOpIBFE(DxbcDest::R(aL_add_temp, 0b0001), DxbcSrc::LU(8), @@ -1963,6 +1979,10 @@ const DxbcShaderTranslator::RdefType DxbcShaderTranslator::rdef_types_[size_t( // kUint4Array48 {nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4, 48, 0, RdefTypeIndex::kUint4, nullptr}, + // kUint4DescriptorIndexArray - bindless descriptor indices - size written + // dynamically. + {nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4, + 0, 0, RdefTypeIndex::kUint4, nullptr}, }; const DxbcShaderTranslator::SystemConstantRdef DxbcShaderTranslator:: @@ -2042,22 +2062,17 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(cbuffer_count_); // Constant buffer offset (set later). shader_object_.push_back(0); - // Bound resource count (samplers, SRV, UAV, CBV). - uint32_t resource_count = cbuffer_count_; - if (!is_depth_only_pixel_shader_) { - // + 2 for shared memory SRV and UAV (vfetches can appear in pixel shaders - // too, and the UAV is needed for memexport, however, the choice between - // SRV and UAV is per-pipeline, not per-shader - a resource can't be in a - // read-only state (SRV, IBV) if it's in a read/write state such as UAV). - resource_count += - uint32_t(sampler_bindings_.size()) + 2 + uint32_t(texture_srvs_.size()); - } - if (IsDxbcPixelShader() && edram_rov_used_) { - // EDRAM. - ++resource_count; + // Bindful resource count. + uint32_t resource_count = srv_count_ + uav_count_ + cbuffer_count_; + if (!sampler_bindings_.empty()) { + if (bindless_resources_used_) { + ++resource_count; + } else { + resource_count += uint32_t(sampler_bindings_.size()); + } } shader_object_.push_back(resource_count); - // Bound resource buffer offset (set later). + // Bindful resource buffer offset (set later). shader_object_.push_back(0); if (IsDxbcVertexShader()) { // vs_5_1 @@ -2119,14 +2134,20 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(uint32_t(type.variable_class) | (uint32_t(type.variable_type) << 16)); shader_object_.push_back(type.row_count | (type.column_count << 16)); - if (RdefTypeIndex(i) == RdefTypeIndex::kFloat4ConstantArray) { - // Declaring a 0-sized array may not be safe, so write something valid - // even if they aren't used. - shader_object_.push_back( - std::max(constant_register_map().float_count, uint32_t(1))); - } else { - shader_object_.push_back(type.element_count | - (type.struct_member_count << 16)); + switch (RdefTypeIndex(i)) { + case RdefTypeIndex::kFloat4ConstantArray: + // Declaring a 0-sized array may not be safe, so write something valid + // even if they aren't used. + shader_object_.push_back( + std::max(constant_register_map().float_count, uint32_t(1))); + break; + case RdefTypeIndex::kUint4DescriptorIndexArray: + shader_object_.push_back(std::max( + uint32_t((GetBindlessResourceCount() + 3) >> 2), uint32_t(1))); + break; + default: + shader_object_.push_back(type.element_count | + (type.struct_member_count << 16)); } // Struct member offset (set later). shader_object_.push_back(0); @@ -2177,33 +2198,37 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) * sizeof(uint32_t); uint32_t constant_name_offsets_system[kSysConst_Count]; - if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) { for (uint32_t i = 0; i < kSysConst_Count; ++i) { constant_name_offsets_system[i] = new_offset; new_offset += AppendString(shader_object_, system_constant_rdef_[i].name); } } uint32_t constant_name_offset_float = new_offset; - if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) { new_offset += AppendString(shader_object_, "xe_float_constants"); } uint32_t constant_name_offset_bool = new_offset; - uint32_t constant_name_offset_loop = constant_name_offset_bool; - if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) { + uint32_t constant_name_offset_loop = new_offset; + if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) { new_offset += AppendString(shader_object_, "xe_bool_constants"); constant_name_offset_loop = new_offset; new_offset += AppendString(shader_object_, "xe_loop_constants"); } uint32_t constant_name_offset_fetch = new_offset; - if (constant_name_offset_fetch != kCbufferIndexUnallocated) { + if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) { new_offset += AppendString(shader_object_, "xe_fetch_constants"); } + uint32_t constant_name_offset_descriptor_indices = new_offset; + if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) { + new_offset += AppendString(shader_object_, "xe_descriptor_indices"); + } const uint32_t constant_size = 10 * sizeof(uint32_t); // System constants. uint32_t constant_offset_system = new_offset; - if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) { uint32_t system_cbuffer_constant_offset = 0; for (uint32_t i = 0; i < kSysConst_Count; ++i) { const SystemConstantRdef& constant = system_constant_rdef_[i]; @@ -2229,12 +2254,12 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { // Float constants. uint32_t constant_offset_float = new_offset; - if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) { + assert_not_zero(constant_register_map().float_count); shader_object_.push_back(constant_name_offset_float); shader_object_.push_back(0); - shader_object_.push_back( - std::max(constant_register_map().float_count, uint32_t(1)) * 4 * - sizeof(float)); + shader_object_.push_back(constant_register_map().float_count * 4 * + sizeof(float)); shader_object_.push_back(kDxbcRdefVariableFlagUsed); shader_object_.push_back(types_offset + uint32_t(RdefTypeIndex::kFloat4ConstantArray) * @@ -2249,7 +2274,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { // Bool and loop constants. uint32_t constant_offset_bool_loop = new_offset; - if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) { shader_object_.push_back(constant_name_offset_bool); shader_object_.push_back(0); shader_object_.push_back(2 * 4 * sizeof(uint32_t)); @@ -2279,7 +2304,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { // Fetch constants. uint32_t constant_offset_fetch = new_offset; - if (cbuffer_index_fetch_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) { shader_object_.push_back(constant_name_offset_fetch); shader_object_.push_back(0); shader_object_.push_back(32 * 6 * sizeof(uint32_t)); @@ -2294,6 +2319,26 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { new_offset += constant_size; } + // Bindless description indices. + uint32_t constant_offset_descriptor_indices = new_offset; + if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) { + assert_not_zero(GetBindlessResourceCount()); + shader_object_.push_back(constant_name_offset_descriptor_indices); + shader_object_.push_back(0); + shader_object_.push_back( + xe::align(GetBindlessResourceCount(), uint32_t(4)) * sizeof(uint32_t)); + shader_object_.push_back(kDxbcRdefVariableFlagUsed); + shader_object_.push_back( + types_offset + + uint32_t(RdefTypeIndex::kUint4DescriptorIndexArray) * type_size); + shader_object_.push_back(0); + shader_object_.push_back(0xFFFFFFFFu); + shader_object_.push_back(0); + shader_object_.push_back(0xFFFFFFFFu); + shader_object_.push_back(0); + new_offset += constant_size; + } + // *************************************************************************** // Constant buffers // *************************************************************************** @@ -2302,21 +2347,25 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) * sizeof(uint32_t); uint32_t cbuffer_name_offset_system = new_offset; - if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) { new_offset += AppendString(shader_object_, "xe_system_cbuffer"); } uint32_t cbuffer_name_offset_float = new_offset; - if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) { new_offset += AppendString(shader_object_, "xe_float_cbuffer"); } uint32_t cbuffer_name_offset_bool_loop = new_offset; - if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) { new_offset += AppendString(shader_object_, "xe_bool_loop_cbuffer"); } uint32_t cbuffer_name_offset_fetch = new_offset; - if (cbuffer_index_fetch_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) { new_offset += AppendString(shader_object_, "xe_fetch_cbuffer"); } + uint32_t cbuffer_name_offset_descriptor_indices = new_offset; + if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) { + new_offset += AppendString(shader_object_, "xe_descriptor_indices_cbuffer"); + } // Write the offset to the header. shader_object_[chunk_position_dwords + 1] = new_offset; @@ -2333,12 +2382,12 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { // No D3D_SHADER_CBUFFER_FLAGS. shader_object_.push_back(0); } else if (i == cbuffer_index_float_constants_) { + assert_not_zero(constant_register_map().float_count); shader_object_.push_back(cbuffer_name_offset_float); shader_object_.push_back(1); shader_object_.push_back(constant_offset_float); - shader_object_.push_back( - std::max(constant_register_map().float_count, uint32_t(1)) * 4 * - sizeof(float)); + shader_object_.push_back(constant_register_map().float_count * 4 * + sizeof(float)); shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer)); shader_object_.push_back(0); } else if (i == cbuffer_index_bool_loop_constants_) { @@ -2356,6 +2405,18 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(32 * 6 * sizeof(uint32_t)); shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer)); shader_object_.push_back(0); + } else if (i == cbuffer_index_descriptor_indices_) { + assert_not_zero(GetBindlessResourceCount()); + shader_object_.push_back(cbuffer_name_offset_descriptor_indices); + shader_object_.push_back(1); + shader_object_.push_back(constant_offset_descriptor_indices); + shader_object_.push_back( + xe::align(GetBindlessResourceCount(), uint32_t(4)) * + sizeof(uint32_t)); + shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer)); + shader_object_.push_back(0); + } else { + assert_unhandled_case(i); } } @@ -2367,138 +2428,219 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { // their names already. new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) * sizeof(uint32_t); - uint32_t sampler_name_offset = 0; - uint32_t shared_memory_srv_name_offset = 0; - uint32_t texture_name_offset = 0; - uint32_t shared_memory_uav_name_offset = 0; - if (!is_depth_only_pixel_shader_) { - sampler_name_offset = new_offset; - for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) { - new_offset += - AppendString(shader_object_, sampler_bindings_[i].name.c_str()); + uint32_t sampler_name_offset = new_offset; + if (!sampler_bindings_.empty()) { + if (bindless_resources_used_) { + new_offset += AppendString(shader_object_, "xe_samplers"); + } else { + for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) { + new_offset += + AppendString(shader_object_, sampler_bindings_[i].name.c_str()); + } } - shared_memory_srv_name_offset = new_offset; + } + uint32_t shared_memory_srv_name_offset = new_offset; + if (srv_index_shared_memory_ != kBindingIndexUnallocated) { new_offset += AppendString(shader_object_, "xe_shared_memory_srv"); - texture_name_offset = new_offset; - for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) { - new_offset += AppendString(shader_object_, texture_srvs_[i].name.c_str()); + } + uint32_t bindless_textures_2d_name_offset = new_offset; + uint32_t bindless_textures_3d_name_offset = new_offset; + uint32_t bindless_textures_cube_name_offset = new_offset; + if (bindless_resources_used_) { + if (srv_index_bindless_textures_2d_ != kBindingIndexUnallocated) { + bindless_textures_2d_name_offset = new_offset; + new_offset += AppendString(shader_object_, "xe_textures_2d"); } - shared_memory_uav_name_offset = new_offset; + if (srv_index_bindless_textures_3d_ != kBindingIndexUnallocated) { + bindless_textures_3d_name_offset = new_offset; + new_offset += AppendString(shader_object_, "xe_textures_3d"); + } + if (srv_index_bindless_textures_cube_ != kBindingIndexUnallocated) { + bindless_textures_cube_name_offset = new_offset; + new_offset += AppendString(shader_object_, "xe_textures_cube"); + } + } else { + for (TextureBinding& texture_binding : texture_bindings_) { + texture_binding.bindful_srv_rdef_name_offset = new_offset; + new_offset += AppendString(shader_object_, texture_binding.name.c_str()); + } + } + uint32_t shared_memory_uav_name_offset = new_offset; + if (uav_index_shared_memory_ != kBindingIndexUnallocated) { new_offset += AppendString(shader_object_, "xe_shared_memory_uav"); } uint32_t edram_name_offset = new_offset; - if (IsDxbcPixelShader() && edram_rov_used_) { + if (uav_index_edram_ != kBindingIndexUnallocated) { new_offset += AppendString(shader_object_, "xe_edram"); } // Write the offset to the header. shader_object_[chunk_position_dwords + 3] = new_offset; - if (!is_depth_only_pixel_shader_) { - // Samplers. - for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) { - const SamplerBinding& sampler_binding = sampler_bindings_[i]; + // Samplers. + if (!sampler_bindings_.empty()) { + if (bindless_resources_used_) { + // Bindless sampler heap. shader_object_.push_back(sampler_name_offset); shader_object_.push_back(uint32_t(DxbcRdefInputType::kSampler)); shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid)); shader_object_.push_back(uint32_t(DxbcRdefDimension::kUnknown)); // Multisampling not applicable. shader_object_.push_back(0); - // Register s[i]. - shader_object_.push_back(i); + // Registers s0:*. + shader_object_.push_back(0); + // Unbounded number of bindings. + shader_object_.push_back(0); + // No DxbcRdefInputFlags. + shader_object_.push_back(0); + // Register space 0. + shader_object_.push_back(0); + // Sampler ID S0. + shader_object_.push_back(0); + } else { + // Bindful samplers. + uint32_t sampler_current_name_offset = sampler_name_offset; + for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) { + const SamplerBinding& sampler_binding = sampler_bindings_[i]; + shader_object_.push_back(sampler_current_name_offset); + shader_object_.push_back(uint32_t(DxbcRdefInputType::kSampler)); + shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid)); + shader_object_.push_back(uint32_t(DxbcRdefDimension::kUnknown)); + // Multisampling not applicable. + shader_object_.push_back(0); + // Register s[i]. + shader_object_.push_back(i); + // One binding. + shader_object_.push_back(1); + // No DxbcRdefInputFlags. + shader_object_.push_back(0); + // Register space 0. + shader_object_.push_back(0); + // Sampler ID S[i]. + shader_object_.push_back(i); + sampler_current_name_offset += + GetStringLength(sampler_binding.name.c_str()); + } + } + } + + // Shader resource views, sorted by binding index. + for (uint32_t i = 0; i < srv_count_; ++i) { + if (i == srv_index_shared_memory_) { + // Shared memory (when memexport isn't used in the pipeline). + shader_object_.push_back(shared_memory_srv_name_offset); + shader_object_.push_back(uint32_t(DxbcRdefInputType::kByteAddress)); + shader_object_.push_back(uint32_t(DxbcRdefReturnType::kMixed)); + shader_object_.push_back(uint32_t(DxbcRdefDimension::kSRVBuffer)); + // Multisampling not applicable. + shader_object_.push_back(0); + shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory)); + // One binding. + shader_object_.push_back(1); + // No DxbcRdefInputFlags. + shader_object_.push_back(0); + shader_object_.push_back(uint32_t(SRVSpace::kMain)); + } else { + uint32_t texture_name_offset; + DxbcRdefDimension texture_dimension; + uint32_t texture_register; + uint32_t texture_register_count; + SRVSpace texture_register_space; + if (bindless_resources_used_) { + // Bindless texture heap. + if (i == srv_index_bindless_textures_3d_) { + texture_name_offset = bindless_textures_3d_name_offset; + texture_dimension = DxbcRdefDimension::kSRVTexture3D; + texture_register_space = SRVSpace::kBindlessTextures3D; + } else if (i == srv_index_bindless_textures_cube_) { + texture_name_offset = bindless_textures_cube_name_offset; + texture_dimension = DxbcRdefDimension::kSRVTextureCube; + texture_register_space = SRVSpace::kBindlessTexturesCube; + } else { + assert_true(i == srv_index_bindless_textures_2d_); + texture_name_offset = bindless_textures_2d_name_offset; + texture_dimension = DxbcRdefDimension::kSRVTexture2DArray; + texture_register_space = SRVSpace::kBindlessTextures2DArray; + } + texture_register = 0; + texture_register_count = 0; + } else { + // Bindful texture. + auto it = texture_bindings_for_bindful_srv_indices_.find(i); + assert_true(it != texture_bindings_for_bindful_srv_indices_.end()); + uint32_t texture_binding_index = it->second; + const TextureBinding& texture_binding = + texture_bindings_[texture_binding_index]; + texture_name_offset = texture_binding.bindful_srv_rdef_name_offset; + switch (texture_binding.dimension) { + case TextureDimension::k3D: + texture_dimension = DxbcRdefDimension::kSRVTexture3D; + break; + case TextureDimension::kCube: + texture_dimension = DxbcRdefDimension::kSRVTextureCube; + break; + default: + assert_true(texture_binding.dimension == TextureDimension::k2D); + texture_dimension = DxbcRdefDimension::kSRVTexture2DArray; + } + texture_register = uint32_t(SRVMainRegister::kBindfulTexturesStart) + + texture_binding_index; + texture_register_count = 1; + texture_register_space = SRVSpace::kMain; + } + shader_object_.push_back(texture_name_offset); + shader_object_.push_back(uint32_t(DxbcRdefInputType::kTexture)); + shader_object_.push_back(uint32_t(DxbcRdefReturnType::kFloat)); + shader_object_.push_back(uint32_t(texture_dimension)); + // Not multisampled. + shader_object_.push_back(0xFFFFFFFFu); + shader_object_.push_back(texture_register); + shader_object_.push_back(texture_register_count); + // 4-component. + shader_object_.push_back(DxbcRdefInputFlagsComponents); + shader_object_.push_back(uint32_t(texture_register_space)); + } + // SRV ID T[i]. + shader_object_.push_back(i); + } + + // Unordered access views, sorted by binding index. + for (uint32_t i = 0; i < uav_count_; ++i) { + if (i == uav_index_shared_memory_) { + // Shared memory (when memexport is used in the pipeline). + shader_object_.push_back(shared_memory_uav_name_offset); + shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWByteAddress)); + shader_object_.push_back(uint32_t(DxbcRdefReturnType::kMixed)); + shader_object_.push_back(uint32_t(DxbcRdefDimension::kUAVBuffer)); + // Multisampling not applicable. + shader_object_.push_back(0); + shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory)); // One binding. shader_object_.push_back(1); // No DxbcRdefInputFlags. shader_object_.push_back(0); // Register space 0. shader_object_.push_back(0); - // Sampler ID S[i]. - shader_object_.push_back(i); - sampler_name_offset += GetStringLength(sampler_binding.name.c_str()); - } - - // Shared memory (when memexport isn't used in the pipeline). - shader_object_.push_back(shared_memory_srv_name_offset); - shader_object_.push_back(uint32_t(DxbcRdefInputType::kByteAddress)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kMixed)); - shader_object_.push_back(uint32_t(DxbcRdefDimension::kSRVBuffer)); - // Multisampling not applicable. - shader_object_.push_back(0); - shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory)); - // One binding. - shader_object_.push_back(1); - // No DxbcRdefInputFlags. - shader_object_.push_back(0); - shader_object_.push_back(uint32_t(SRVSpace::kMain)); - // SRV ID T0. - shader_object_.push_back(0); - - for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) { - const TextureSRV& texture_srv = texture_srvs_[i]; - shader_object_.push_back(texture_name_offset); - shader_object_.push_back(uint32_t(DxbcRdefInputType::kTexture)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kFloat)); - switch (texture_srv.dimension) { - case TextureDimension::k3D: - shader_object_.push_back(uint32_t(DxbcRdefDimension::kSRVTexture3D)); - break; - case TextureDimension::kCube: - shader_object_.push_back( - uint32_t(DxbcRdefDimension::kSRVTextureCube)); - break; - default: - shader_object_.push_back( - uint32_t(DxbcRdefDimension::kSRVTexture2DArray)); - } + } else if (i == uav_index_edram_) { + // EDRAM R32_UINT buffer. + shader_object_.push_back(edram_name_offset); + shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWTyped)); + shader_object_.push_back(uint32_t(DxbcRdefReturnType::kUInt)); + shader_object_.push_back(uint32_t(DxbcRdefDimension::kUAVBuffer)); // Not multisampled. shader_object_.push_back(0xFFFFFFFFu); - shader_object_.push_back(uint32_t(SRVMainRegister::kBoundTexturesStart) + - i); + shader_object_.push_back(uint32_t(UAVRegister::kEDRAM)); // One binding. shader_object_.push_back(1); - // 4-component. - shader_object_.push_back(DxbcRdefInputFlagsComponents); - shader_object_.push_back(uint32_t(SRVSpace::kMain)); - // SRV ID T[1 + i] - T0 is shared memory. - shader_object_.push_back(1 + i); - texture_name_offset += GetStringLength(texture_srv.name.c_str()); + // No DxbcRdefInputFlags. + shader_object_.push_back(0); + // Register space 0. + shader_object_.push_back(0); + } else { + assert_unhandled_case(i); } - - // Shared memory (when memexport is used in the pipeline). - shader_object_.push_back(shared_memory_uav_name_offset); - shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWByteAddress)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kMixed)); - shader_object_.push_back(uint32_t(DxbcRdefDimension::kUAVBuffer)); - // Multisampling not applicable. - shader_object_.push_back(0); - shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory)); - // One binding. - shader_object_.push_back(1); - // No DxbcRdefInputFlags. - shader_object_.push_back(0); - // Register space 0. - shader_object_.push_back(0); - // UAV ID U0. - shader_object_.push_back(0); - } - - if (IsDxbcPixelShader() && edram_rov_used_) { - // EDRAM uint32 buffer. - shader_object_.push_back(edram_name_offset); - shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWTyped)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kUInt)); - shader_object_.push_back(uint32_t(DxbcRdefDimension::kUAVBuffer)); - // Not multisampled. - shader_object_.push_back(0xFFFFFFFFu); - shader_object_.push_back(uint32_t(UAVRegister::kEDRAM)); - // One binding. - shader_object_.push_back(1); - // No DxbcRdefInputFlags. - shader_object_.push_back(0); - // Register space 0. - shader_object_.push_back(0); - // UAV ID U1 or U0 depending on whether there's U0. - shader_object_.push_back(ROV_GetEDRAMUAVIndex()); + // UAV ID U[i]. + shader_object_.push_back(i); } // Constant buffers. @@ -2516,6 +2658,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { } else if (i == cbuffer_index_fetch_constants_) { shader_object_.push_back(cbuffer_name_offset_fetch); register_index = uint32_t(CbufferRegister::kFetchConstants); + } else if (i == cbuffer_index_descriptor_indices_) { + shader_object_.push_back(cbuffer_name_offset_descriptor_indices); + register_index = uint32_t(CbufferRegister::kDescriptorIndices); + } else { + assert_unhandled_case(i); } shader_object_.push_back(uint32_t(DxbcRdefInputType::kCbuffer)); shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid)); @@ -3180,7 +3327,8 @@ void DxbcShaderTranslator::WriteShaderCode() { // Constant buffers, from most frequenly accessed to least frequently accessed // (the order is a hint to the driver according to the DXBC header). - if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) { + assert_not_zero(constant_register_map().float_count); shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) | ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN( @@ -3196,7 +3344,7 @@ void DxbcShaderTranslator::WriteShaderCode() { shader_object_.push_back(constant_register_map().float_count); shader_object_.push_back(0); } - if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) { shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) | ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN( @@ -3210,7 +3358,7 @@ void DxbcShaderTranslator::WriteShaderCode() { shader_object_.push_back((sizeof(SystemConstants) + 15) >> 4); shader_object_.push_back(0); } - if (cbuffer_index_fetch_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) { shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) | ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN( @@ -3224,7 +3372,22 @@ void DxbcShaderTranslator::WriteShaderCode() { shader_object_.push_back(48); shader_object_.push_back(0); } - if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) { + if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) { + assert_not_zero(GetBindlessResourceCount()); + shader_object_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) | + ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN( + D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_object_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); + shader_object_.push_back(cbuffer_index_descriptor_indices_); + shader_object_.push_back(uint32_t(CbufferRegister::kDescriptorIndices)); + shader_object_.push_back(uint32_t(CbufferRegister::kDescriptorIndices)); + shader_object_.push_back((GetBindlessResourceCount() + 3) >> 2); + shader_object_.push_back(0); + } + if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) { shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) | ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN( @@ -3239,46 +3402,93 @@ void DxbcShaderTranslator::WriteShaderCode() { shader_object_.push_back(0); } - if (!is_depth_only_pixel_shader_) { - // Samplers. - for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) { - const SamplerBinding& sampler_binding = sampler_bindings_[i]; + // Samplers. + if (!sampler_bindings_.empty()) { + if (bindless_resources_used_) { + // Bindless sampler heap. shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_SAMPLER) | ENCODE_D3D10_SB_SAMPLER_MODE(D3D10_SB_SAMPLER_MODE_DEFAULT) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); shader_object_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 3)); - shader_object_.push_back(i); - shader_object_.push_back(i); - shader_object_.push_back(i); shader_object_.push_back(0); + shader_object_.push_back(0); + shader_object_.push_back(UINT32_MAX); + shader_object_.push_back(0); + } else { + // Bindful samplers. + for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) { + const SamplerBinding& sampler_binding = sampler_bindings_[i]; + shader_object_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_SAMPLER) | + ENCODE_D3D10_SB_SAMPLER_MODE(D3D10_SB_SAMPLER_MODE_DEFAULT) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); + shader_object_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 3)); + shader_object_.push_back(i); + shader_object_.push_back(i); + shader_object_.push_back(i); + shader_object_.push_back(0); + } } + } - // Shader resources. - // Shared memory ByteAddressBuffer. - shader_object_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_RAW) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); - shader_object_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 3)); - shader_object_.push_back(0); - shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory)); - shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory)); - shader_object_.push_back(uint32_t(SRVSpace::kMain)); - // Textures. - for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) { - const TextureSRV& texture_srv = texture_srvs_[i]; + // Shader resource views, sorted by binding index. + for (uint32_t i = 0; i < srv_count_; ++i) { + if (i == srv_index_shared_memory_) { + // Shared memory ByteAddressBuffer. + shader_object_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_RAW) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); + shader_object_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 3)); + shader_object_.push_back(srv_index_shared_memory_); + shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory)); + shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory)); + shader_object_.push_back(uint32_t(SRVSpace::kMain)); + } else { + // Texture or texture heap. D3D10_SB_RESOURCE_DIMENSION texture_srv_dimension; - switch (texture_srv.dimension) { - case TextureDimension::k3D: + uint32_t texture_register_first, texture_register_last; + SRVSpace texture_register_space; + if (bindless_resources_used_) { + // Bindless texture heap. + texture_register_first = 0; + texture_register_last = UINT32_MAX; + if (i == srv_index_bindless_textures_3d_) { texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D; - break; - case TextureDimension::kCube: + texture_register_space = SRVSpace::kBindlessTextures3D; + } else if (i == srv_index_bindless_textures_cube_) { texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE; - break; - default: + texture_register_space = SRVSpace::kBindlessTexturesCube; + } else { + assert_true(i == srv_index_bindless_textures_2d_); texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY; + texture_register_space = SRVSpace::kBindlessTextures2DArray; + } + } else { + // Bindful texture. + auto it = texture_bindings_for_bindful_srv_indices_.find(i); + assert_true(it != texture_bindings_for_bindful_srv_indices_.end()); + uint32_t texture_binding_index = it->second; + const TextureBinding& texture_binding = + texture_bindings_[texture_binding_index]; + switch (texture_binding.dimension) { + case TextureDimension::k3D: + texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D; + break; + case TextureDimension::kCube: + texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE; + break; + default: + assert_true(texture_binding.dimension == TextureDimension::k2D); + texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY; + } + texture_register_first = texture_register_last = + uint32_t(SRVMainRegister::kBindfulTexturesStart) + + texture_binding_index; + texture_register_space = SRVSpace::kMain; } shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_RESOURCE) | @@ -3286,54 +3496,55 @@ void DxbcShaderTranslator::WriteShaderCode() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_object_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 3)); - // T0 is shared memory. - shader_object_.push_back(1 + i); - shader_object_.push_back(uint32_t(SRVMainRegister::kBoundTexturesStart) + - i); - shader_object_.push_back(uint32_t(SRVMainRegister::kBoundTexturesStart) + - i); + shader_object_.push_back(i); + shader_object_.push_back(texture_register_first); + shader_object_.push_back(texture_register_last); shader_object_.push_back( ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 0) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 1) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 2) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 3)); - shader_object_.push_back(uint32_t(SRVSpace::kMain)); + shader_object_.push_back(uint32_t(texture_register_space)); } } - // Unordered access views. - if (!is_depth_only_pixel_shader_) { - // Shared memory RWByteAddressBuffer. - shader_object_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE( - D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); - shader_object_.push_back(EncodeVectorSwizzledOperand( - D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3)); - shader_object_.push_back(0); - shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory)); - shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory)); - shader_object_.push_back(0); - } - if (IsDxbcPixelShader() && edram_rov_used_) { - // EDRAM uint32 rasterizer-ordered buffer. - shader_object_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE( - D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) | - ENCODE_D3D10_SB_RESOURCE_DIMENSION(D3D10_SB_RESOURCE_DIMENSION_BUFFER) | - D3D11_SB_RASTERIZER_ORDERED_ACCESS | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_object_.push_back(EncodeVectorSwizzledOperand( - D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3)); - shader_object_.push_back(ROV_GetEDRAMUAVIndex()); - shader_object_.push_back(uint32_t(UAVRegister::kEDRAM)); - shader_object_.push_back(uint32_t(UAVRegister::kEDRAM)); - shader_object_.push_back( - ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 0) | - ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 1) | - ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 2) | - ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 3)); - shader_object_.push_back(0); + // Unordered access views, sorted by binding index. + for (uint32_t i = 0; i < uav_count_; ++i) { + if (i == uav_index_shared_memory_) { + // Shared memory RWByteAddressBuffer. + shader_object_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE( + D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); + shader_object_.push_back(EncodeVectorSwizzledOperand( + D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3)); + shader_object_.push_back(uav_index_shared_memory_); + shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory)); + shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory)); + shader_object_.push_back(0); + } else if (i == uav_index_edram_) { + // EDRAM buffer R32_UINT rasterizer-ordered view. + shader_object_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE( + D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) | + ENCODE_D3D10_SB_RESOURCE_DIMENSION( + D3D10_SB_RESOURCE_DIMENSION_BUFFER) | + D3D11_SB_RASTERIZER_ORDERED_ACCESS | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_object_.push_back(EncodeVectorSwizzledOperand( + D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3)); + shader_object_.push_back(uav_index_edram_); + shader_object_.push_back(uint32_t(UAVRegister::kEDRAM)); + shader_object_.push_back(uint32_t(UAVRegister::kEDRAM)); + shader_object_.push_back( + ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 0) | + ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 1) | + ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 2) | + ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 3)); + shader_object_.push_back(0); + } else { + assert_unhandled_case(i); + } } // Inputs and outputs. diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 1512876da..34286535c 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -101,8 +101,8 @@ namespace gpu { // 0 for NaN. class DxbcShaderTranslator : public ShaderTranslator { public: - DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used, - bool force_emit_source_map = false); + DxbcShaderTranslator(uint32_t vendor_id, bool bindless_resources_used, + bool edram_rov_used, bool force_emit_source_map = false); ~DxbcShaderTranslator() override; // Constant buffer bindings in space 0. @@ -111,6 +111,7 @@ class DxbcShaderTranslator : public ShaderTranslator { kFloatConstants, kBoolLoopConstants, kFetchConstants, + kDescriptorIndices, }; // Some are referenced in xenos_draw.hlsli - check it too when updating! @@ -331,30 +332,39 @@ class DxbcShaderTranslator : public ShaderTranslator { enum class SRVSpace { // SRVMainSpaceRegister t# layout. kMain, + kBindlessTextures2DArray, + kBindlessTextures3D, + kBindlessTexturesCube, }; // Shader resource view bindings in SRVSpace::kMain. enum class SRVMainRegister { kSharedMemory, - kBoundTexturesStart, + kBindfulTexturesStart, }; // 192 textures at most because there are 32 fetch constants, and textures can // be 2D array, 3D or cube, and also signed and unsigned. - static constexpr uint32_t kMaxTextureSRVIndexBits = 8; - static constexpr uint32_t kMaxTextureSRVs = - (1 << kMaxTextureSRVIndexBits) - 1; - struct TextureSRV { + static constexpr uint32_t kMaxTextureBindingIndexBits = 8; + static constexpr uint32_t kMaxTextureBindings = + (1 << kMaxTextureBindingIndexBits) - 1; + struct TextureBinding { + uint32_t bindful_srv_index; + // Temporary for WriteResourceDefinitions. + uint32_t bindful_srv_rdef_name_offset; + uint32_t bindless_descriptor_index; uint32_t fetch_constant; + // Stacked and 3D are separate TextureBindings, even for bindless for null + // descriptor handling simplicity. TextureDimension dimension; bool is_signed; std::string name; }; - // The first binding returned is at t[SRVMainRegister::kBoundTexturesStart] + // The first binding returned is at t[SRVMainRegister::kBindfulTexturesStart] // of space SRVSpace::kMain. - const TextureSRV* GetTextureSRVs(uint32_t& count_out) const { - count_out = uint32_t(texture_srvs_.size()); - return texture_srvs_.data(); + const TextureBinding* GetTextureBindings(uint32_t& count_out) const { + count_out = uint32_t(texture_bindings_.size()); + return texture_bindings_.data(); } // Arbitrary limit - there can't be more than 2048 in a shader-visible @@ -369,6 +379,7 @@ class DxbcShaderTranslator : public ShaderTranslator { static constexpr uint32_t kMaxSamplerBindings = (1 << kMaxSamplerBindingIndexBits) - 1; struct SamplerBinding { + uint32_t bindless_descriptor_index; uint32_t fetch_constant; TextureFilter mag_filter; TextureFilter min_filter; @@ -381,6 +392,12 @@ class DxbcShaderTranslator : public ShaderTranslator { return sampler_bindings_.data(); } + // Returns the number of texture SRV and sampler offsets that need to be + // passed via a constant buffer to the shader. + uint32_t GetBindlessResourceCount() const { + return uint32_t(texture_bindings_.size() + sampler_bindings_.size()); + } + // Unordered access view bindings in space 0. enum class UAVRegister { kSharedMemory, @@ -2144,11 +2161,6 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t piece_temp_component, uint32_t accumulator_temp, uint32_t accumulator_temp_component); - inline uint32_t ROV_GetEDRAMUAVIndex() const { - // xe_edram is U1 when there's xe_shared_memory_uav which is U0, but when - // there's no xe_shared_memory_uav, it's U0. - return is_depth_only_pixel_shader_ ? 0 : 1; - } // Whether it's possible and worth skipping running the translated shader for // 2x2 quads. bool ROV_IsDepthStencilEarly() const { @@ -2328,19 +2340,19 @@ class DxbcShaderTranslator : public ShaderTranslator { void CloseInstructionPredication(); void JumpToLabel(uint32_t address); - DxbcSrc FindOrAddTextureSRV(uint32_t fetch_constant, - TextureDimension dimension, bool is_signed); - DxbcSrc FindOrAddSamplerBinding(uint32_t fetch_constant, - TextureFilter mag_filter, - TextureFilter min_filter, - TextureFilter mip_filter, - AnisoFilter aniso_filter); + uint32_t FindOrAddTextureBinding(uint32_t fetch_constant, + TextureDimension dimension, bool is_signed); + uint32_t FindOrAddSamplerBinding(uint32_t fetch_constant, + TextureFilter mag_filter, + TextureFilter min_filter, + TextureFilter mip_filter, + AnisoFilter aniso_filter); // Marks fetch constants as used by the DXBC shader and returns DxbcSrc // for the words 01 (pair 0), 23 (pair 1) or 45 (pair 2) of the texture fetch // constant. DxbcSrc RequestTextureFetchConstantWordPair(uint32_t fetch_constant_index, uint32_t pair_index) { - if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) { + if (cbuffer_index_fetch_constants_ == kBindingIndexUnallocated) { cbuffer_index_fetch_constants_ = cbuffer_count_++; } uint32_t total_pair_index = fetch_constant_index * 3 + pair_index; @@ -2392,6 +2404,9 @@ class DxbcShaderTranslator : public ShaderTranslator { // Vendor ID of the GPU manufacturer, for toggling unsupported features. uint32_t vendor_id_; + // Whether textures and samplers should be bindless. + bool bindless_resources_used_; + // Whether the output merger should be emulated in pixel shaders. bool edram_rov_used_; @@ -2422,6 +2437,8 @@ class DxbcShaderTranslator : public ShaderTranslator { kUint4Array8, // Fetch constants. kUint4Array48, + // Descriptor indices - size written dynamically. + kUint4DescriptorIndexArray, kCount, kUnknown = kCount @@ -2448,14 +2465,16 @@ class DxbcShaderTranslator : public ShaderTranslator { }; static const RdefType rdef_types_[size_t(RdefTypeIndex::kCount)]; + static constexpr uint32_t kBindingIndexUnallocated = UINT32_MAX; + // Number of constant buffer bindings used in this shader - also used for // generation of indices of constant buffers that are optional. uint32_t cbuffer_count_; - static constexpr uint32_t kCbufferIndexUnallocated = UINT32_MAX; uint32_t cbuffer_index_system_constants_; uint32_t cbuffer_index_float_constants_; uint32_t cbuffer_index_bool_loop_constants_; uint32_t cbuffer_index_fetch_constants_; + uint32_t cbuffer_index_descriptor_indices_; struct SystemConstantRdef { const char* name; @@ -2582,7 +2601,24 @@ class DxbcShaderTranslator : public ShaderTranslator { // predicate condition anymore. bool cf_exec_predicate_written_; - std::vector texture_srvs_; + // Number of SRV resources used in this shader - also used for generation of + // indices of SRV resources that are optional. + uint32_t srv_count_; + uint32_t srv_index_shared_memory_; + uint32_t srv_index_bindless_textures_2d_; + uint32_t srv_index_bindless_textures_3d_; + uint32_t srv_index_bindless_textures_cube_; + + std::vector texture_bindings_; + std::unordered_map + texture_bindings_for_bindful_srv_indices_; + + // Number of UAV resources used in this shader - also used for generation of + // indices of UAV resources that are optional. + uint32_t uav_count_; + uint32_t uav_index_shared_memory_; + uint32_t uav_index_edram_; + std::vector sampler_bindings_; // Number of `alloc export`s encountered so far in the translation. The index diff --git a/src/xenia/gpu/dxbc_shader_translator_fetch.cc b/src/xenia/gpu/dxbc_shader_translator_fetch.cc index 9f56e3333..20e753e6c 100644 --- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc +++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc @@ -44,7 +44,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( // Create a 2-component DxbcSrc for the fetch constant (vf0 is in [0].xy of // the fetch constants array, vf1 is in [0].zw, vf2 is in [1].xy). - if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) { + if (cbuffer_index_fetch_constants_ == kBindingIndexUnallocated) { cbuffer_index_fetch_constants_ = cbuffer_count_++; } DxbcSrc fetch_constant_src(DxbcSrc::CB( @@ -135,13 +135,21 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( .Select(kSysConst_Flags_Comp), DxbcSrc::LU(kSysFlag_SharedMemoryIsUAV)); DxbcOpIf(false, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + if (srv_index_shared_memory_ == kBindingIndexUnallocated) { + srv_index_shared_memory_ = srv_count_++; + } + if (uav_index_shared_memory_ == kBindingIndexUnallocated) { + uav_index_shared_memory_ = uav_count_++; + } for (uint32_t i = 0; i < 2; ++i) { if (i) { DxbcOpElse(); } DxbcSrc shared_memory_src( - i ? DxbcSrc::U(0, uint32_t(UAVRegister::kSharedMemory)) - : DxbcSrc::T(0, uint32_t(SRVMainRegister::kSharedMemory))); + i ? DxbcSrc::U(uav_index_shared_memory_, + uint32_t(UAVRegister::kSharedMemory)) + : DxbcSrc::T(srv_index_shared_memory_, + uint32_t(SRVMainRegister::kSharedMemory))); uint32_t needed_words_remaining = needed_words; uint32_t word_index_previous = first_word_index; while (needed_words_remaining) { @@ -438,7 +446,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( StoreResult(instr.result, DxbcSrc::R(system_temp_result_)); } -DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddTextureSRV( +uint32_t DxbcShaderTranslator::FindOrAddTextureBinding( uint32_t fetch_constant, TextureDimension dimension, bool is_signed) { // 1D and 2D textures (including stacked ones) are treated as 2D arrays for // binding and coordinate simplicity. @@ -446,47 +454,52 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddTextureSRV( dimension = TextureDimension::k2D; } uint32_t srv_index = UINT32_MAX; - for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) { - TextureSRV& texture_srv = texture_srvs_[i]; - if (texture_srv.fetch_constant == fetch_constant && - texture_srv.dimension == dimension && - texture_srv.is_signed == is_signed) { - srv_index = i; + for (uint32_t i = 0; i < uint32_t(texture_bindings_.size()); ++i) { + TextureBinding& texture_binding = texture_bindings_[i]; + if (texture_binding.fetch_constant == fetch_constant && + texture_binding.dimension == dimension && + texture_binding.is_signed == is_signed) { + return i; + } + } + if (texture_bindings_.size() >= kMaxTextureBindings) { + assert_always(); + return kMaxTextureBindings - 1; + } + uint32_t texture_binding_index = uint32_t(texture_bindings_.size()); + TextureBinding new_texture_binding; + if (!bindless_resources_used_) { + new_texture_binding.bindful_srv_index = srv_count_++; + texture_bindings_for_bindful_srv_indices_.insert( + {new_texture_binding.bindful_srv_index, texture_binding_index}); + } else { + new_texture_binding.bindful_srv_index = kBindingIndexUnallocated; + } + new_texture_binding.bindful_srv_rdef_name_offset = 0; + // Consistently 0 if not bindless as it may be used for hashing. + new_texture_binding.bindless_descriptor_index = + bindless_resources_used_ ? GetBindlessResourceCount() : 0; + new_texture_binding.fetch_constant = fetch_constant; + new_texture_binding.dimension = dimension; + new_texture_binding.is_signed = is_signed; + const char* dimension_name; + switch (dimension) { + case TextureDimension::k3D: + dimension_name = "3d"; break; - } + case TextureDimension::kCube: + dimension_name = "cube"; + break; + default: + dimension_name = "2d"; } - if (srv_index == UINT32_MAX) { - if (texture_srvs_.size() >= kMaxTextureSRVs) { - assert_always(); - srv_index = kMaxTextureSRVs - 1; - } else { - TextureSRV new_texture_srv; - new_texture_srv.fetch_constant = fetch_constant; - new_texture_srv.dimension = dimension; - new_texture_srv.is_signed = is_signed; - const char* dimension_name; - switch (dimension) { - case TextureDimension::k3D: - dimension_name = "3d"; - break; - case TextureDimension::kCube: - dimension_name = "cube"; - break; - default: - dimension_name = "2d"; - } - new_texture_srv.name = fmt::format("xe_texture{}_{}_{}", fetch_constant, + new_texture_binding.name = fmt::format("xe_texture{}_{}_{}", fetch_constant, dimension_name, is_signed ? 's' : 'u'); - srv_index = uint32_t(texture_srvs_.size()); - texture_srvs_.emplace_back(std::move(new_texture_srv)); - } - } - // T0 is shared memory. - return DxbcSrc::T(1 + srv_index, - uint32_t(SRVMainRegister::kBoundTexturesStart) + srv_index); + texture_bindings_.emplace_back(std::move(new_texture_binding)); + return texture_binding_index; } -DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddSamplerBinding( +uint32_t DxbcShaderTranslator::FindOrAddSamplerBinding( uint32_t fetch_constant, TextureFilter mag_filter, TextureFilter min_filter, TextureFilter mip_filter, AnisoFilter aniso_filter) { // In Direct3D 12, anisotropic filtering implies linear filtering. @@ -505,43 +518,42 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddSamplerBinding( sampler_binding.min_filter == min_filter && sampler_binding.mip_filter == mip_filter && sampler_binding.aniso_filter == aniso_filter) { - sampler_index = i; - break; + return i; } } - if (sampler_index == UINT32_MAX) { - if (sampler_bindings_.size() >= kMaxSamplerBindings) { - assert_always(); - sampler_index = kMaxSamplerBindings - 1; + if (sampler_bindings_.size() >= kMaxSamplerBindings) { + assert_always(); + return kMaxSamplerBindings - 1; + } + std::ostringstream name; + name << "xe_sampler" << fetch_constant; + if (aniso_filter != AnisoFilter::kUseFetchConst) { + if (aniso_filter == AnisoFilter::kDisabled) { + name << "_a0"; } else { - std::ostringstream name; - name << "xe_sampler" << fetch_constant; - if (aniso_filter != AnisoFilter::kUseFetchConst) { - if (aniso_filter == AnisoFilter::kDisabled) { - name << "_a0"; - } else { - name << "_a" << (1u << (uint32_t(aniso_filter) - 1)); - } - } - if (aniso_filter == AnisoFilter::kDisabled || - aniso_filter == AnisoFilter::kUseFetchConst) { - static const char* kFilterSuffixes[] = {"p", "l", "b", "f"}; - name << "_" << kFilterSuffixes[uint32_t(mag_filter)] - << kFilterSuffixes[uint32_t(min_filter)] - << kFilterSuffixes[uint32_t(mip_filter)]; - } - SamplerBinding new_sampler_binding; - new_sampler_binding.fetch_constant = fetch_constant; - new_sampler_binding.mag_filter = mag_filter; - new_sampler_binding.min_filter = min_filter; - new_sampler_binding.mip_filter = mip_filter; - new_sampler_binding.aniso_filter = aniso_filter; - new_sampler_binding.name = name.str(); - sampler_index = uint32_t(sampler_bindings_.size()); - sampler_bindings_.emplace_back(std::move(new_sampler_binding)); + name << "_a" << (1u << (uint32_t(aniso_filter) - 1)); } } - return DxbcSrc::S(sampler_index, sampler_index); + if (aniso_filter == AnisoFilter::kDisabled || + aniso_filter == AnisoFilter::kUseFetchConst) { + static const char* kFilterSuffixes[] = {"p", "l", "b", "f"}; + name << "_" << kFilterSuffixes[uint32_t(mag_filter)] + << kFilterSuffixes[uint32_t(min_filter)] + << kFilterSuffixes[uint32_t(mip_filter)]; + } + SamplerBinding new_sampler_binding; + // Consistently 0 if not bindless as it may be used for hashing. + new_sampler_binding.bindless_descriptor_index = + bindless_resources_used_ ? GetBindlessResourceCount() : 0; + new_sampler_binding.fetch_constant = fetch_constant; + new_sampler_binding.mag_filter = mag_filter; + new_sampler_binding.min_filter = min_filter; + new_sampler_binding.mip_filter = mip_filter; + new_sampler_binding.aniso_filter = aniso_filter; + new_sampler_binding.name = name.str(); + uint32_t sampler_binding_index = uint32_t(sampler_bindings_.size()); + sampler_bindings_.emplace_back(std::move(new_sampler_binding)); + return sampler_binding_index; } void DxbcShaderTranslator::ProcessTextureFetchInstruction( @@ -893,7 +905,6 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( LoadOperand(instr.operands[0], used_result_nonzero_components, coord_operand_temp_pushed); DxbcSrc coord_src(coord_operand); - uint32_t coord_temp = UINT32_MAX; uint32_t offsets_needed = offsets_not_zero & used_result_nonzero_components; if (!instr.attributes.unnormalized_coordinates || offsets_needed) { // Using system_temp_result_ as a temporary for coordinate denormalization @@ -948,7 +959,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // - 1D, 2D array - need to be padded to 2D array coordinates. // - 3D - Z needs to be unnormalized for stacked and normalized for 3D. // - Cube - coordinates need to be transformed into the cube space. - uint32_t coord_temp = PushSystemTemp(); + // Bindless sampler index will be loaded to W after loading the coordinates + // (so W can be used as a temporary for coordinate loading). + uint32_t coord_and_sampler_temp = PushSystemTemp(); // Need normalized coordinates (except for Z - keep it as is, will be // converted later according to whether the texture is 3D). For cube maps, @@ -978,51 +991,54 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( normalized_components); if (offsets_not_zero & normalized_components) { // FIXME(Triang3l): Offsets need to be applied at the LOD being fetched. - DxbcOpAdd(DxbcDest::R(coord_temp, normalized_components), coord_operand, - DxbcSrc::LP(offsets)); + DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, normalized_components), + coord_operand, DxbcSrc::LP(offsets)); assert_not_zero(normalized_components & 0b011); - DxbcOpDiv(DxbcDest::R(coord_temp, normalized_components & 0b011), - DxbcSrc::R(coord_temp), DxbcSrc::R(size_and_is_3d_temp)); + DxbcOpDiv( + DxbcDest::R(coord_and_sampler_temp, normalized_components & 0b011), + DxbcSrc::R(coord_and_sampler_temp), + DxbcSrc::R(size_and_is_3d_temp)); if (instr.dimension == TextureDimension::k3D) { // Normalize if 3D. assert_true((size_needed_components & 0b1100) == 0b1100); DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); - DxbcOpDiv(DxbcDest::R(coord_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ), + DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ)); DxbcOpEndIf(); } } else { - DxbcOpDiv(DxbcDest::R(coord_temp, normalized_components), coord_operand, - DxbcSrc::R(size_and_is_3d_temp)); + DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, normalized_components), + coord_operand, DxbcSrc::R(size_and_is_3d_temp)); if (instr.dimension == TextureDimension::k3D) { // Don't normalize if stacked. assert_true((size_needed_components & 0b1000) == 0b1000); - DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100), + DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100), DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), coord_operand.SelectFromSwizzled(2)); } } } else { // Normalized coordinates - apply offsets to XY or copy them to - // coord_temp, and if stacked, denormalize Z. + // coord_and_sampler_temp, and if stacked, denormalize Z. uint32_t coords_with_offset = offsets_not_zero & normalized_components; if (coords_with_offset) { // FIXME(Triang3l): Offsets need to be applied at the LOD being fetched. assert_true((size_needed_components & coords_with_offset) == coords_with_offset); - DxbcOpDiv(DxbcDest::R(coord_temp, coords_with_offset), + DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, coords_with_offset), DxbcSrc::LP(offsets), DxbcSrc::R(size_and_is_3d_temp)); - DxbcOpAdd(DxbcDest::R(coord_temp, coords_with_offset), coord_operand, - DxbcSrc::R(coord_temp)); + DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, coords_with_offset), + coord_operand, DxbcSrc::R(coord_and_sampler_temp)); } uint32_t coords_without_offset = ~coords_with_offset & normalized_components; // 3D/stacked without offset is handled separately. if (coords_without_offset & 0b011) { - DxbcOpMov(DxbcDest::R(coord_temp, coords_without_offset & 0b011), - coord_operand); + DxbcOpMov( + DxbcDest::R(coord_and_sampler_temp, coords_without_offset & 0b011), + coord_operand); } if (instr.dimension == TextureDimension::k3D) { assert_true((size_needed_components & 0b1100) == 0b1100); @@ -1030,73 +1046,79 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // Denormalize and offset Z (re-apply the offset not to lose precision // as a result of division) if stacked. DxbcOpIf(false, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); - DxbcOpMAd(DxbcDest::R(coord_temp, 0b0100), + DxbcOpMAd(DxbcDest::R(coord_and_sampler_temp, 0b0100), coord_operand.SelectFromSwizzled(2), DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ), DxbcSrc::LF(offsets[2])); DxbcOpEndIf(); } else { // Denormalize Z if stacked, and revert to normalized if 3D. - DxbcOpMul(DxbcDest::R(coord_temp, 0b0100), + DxbcOpMul(DxbcDest::R(coord_and_sampler_temp, 0b0100), coord_operand.SelectFromSwizzled(2), DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ)); - DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100), + DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100), DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW), coord_operand.SelectFromSwizzled(2), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ)); + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); } } } switch (instr.dimension) { case TextureDimension::k1D: // Pad to 2D array coordinates. - DxbcOpMov(DxbcDest::R(coord_temp, 0b0110), DxbcSrc::LF(0.0f)); + DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0110), + DxbcSrc::LF(0.0f)); break; case TextureDimension::k2D: // Pad to 2D array coordinates. - DxbcOpMov(DxbcDest::R(coord_temp, 0b0100), DxbcSrc::LF(0.0f)); + DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::LF(0.0f)); break; case TextureDimension::kCube: { // Transform from the major axis SC/TC plus 1 into cube coordinates. // Move SC/TC from 1...2 to -1...1. - DxbcOpMAd(DxbcDest::R(coord_temp, 0b0011), DxbcSrc::R(coord_temp), - DxbcSrc::LF(2.0f), DxbcSrc::LF(-3.0f)); + DxbcOpMAd(DxbcDest::R(coord_and_sampler_temp, 0b0011), + DxbcSrc::R(coord_and_sampler_temp), DxbcSrc::LF(2.0f), + DxbcSrc::LF(-3.0f)); // Get the face index (floored, within 0...5) as an integer to - // coord_temp.z. + // coord_and_sampler_temp.z. if (offsets[2]) { - DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100), + DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100), coord_operand.SelectFromSwizzled(2), DxbcSrc::LF(offsets[2])); - DxbcOpFToU(DxbcDest::R(coord_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ)); + DxbcOpFToU(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); } else { - DxbcOpFToU(DxbcDest::R(coord_temp, 0b0100), + DxbcOpFToU(DxbcDest::R(coord_and_sampler_temp, 0b0100), coord_operand.SelectFromSwizzled(2)); } - DxbcOpUMin(DxbcDest::R(coord_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ), DxbcSrc::LU(5)); + DxbcOpUMin(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), + DxbcSrc::LU(5)); // Split the face index into axis and sign (0 - positive, 1 - negative) - // to coord_temp.zw (sign in W so it won't be overwritten). - DxbcOpUBFE(DxbcDest::R(coord_temp, 0b1100), DxbcSrc::LU(0, 0, 2, 1), - DxbcSrc::LU(0, 0, 1, 0), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ)); + // to coord_and_sampler_temp.zw (sign in W so it won't be overwritten). + // Fine to overwrite W at this point, the sampler index hasn't been + // loaded yet. + DxbcOpUBFE(DxbcDest::R(coord_and_sampler_temp, 0b1100), + DxbcSrc::LU(0, 0, 2, 1), DxbcSrc::LU(0, 0, 1, 0), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); // Remap the axes in a way opposite to the ALU cube instruction. - DxbcOpSwitch(DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ)); + DxbcOpSwitch(DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); DxbcOpCase(DxbcSrc::LU(0)); { // X is the major axis. // Y = -TC (TC overwritten). - DxbcOpMov(DxbcDest::R(coord_temp, 0b0010), - -DxbcSrc::R(coord_temp, DxbcSrc::kYYYY)); + DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0010), + -DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY)); // Z = neg ? SC : -SC. - DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), - DxbcSrc::R(coord_temp, DxbcSrc::kXXXX), - -DxbcSrc::R(coord_temp, DxbcSrc::kXXXX)); + DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX), + -DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX)); // X = neg ? -1 : 1 (SC overwritten). - DxbcOpMovC(DxbcDest::R(coord_temp, 0b0001), - DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), DxbcSrc::LF(-1.0f), - DxbcSrc::LF(1.0f)); + DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0001), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), + DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f)); } DxbcOpBreak(); DxbcOpCase(DxbcSrc::LU(1)); @@ -1104,31 +1126,31 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // Y is the major axis. // X = SC (already there). // Z = neg ? -TC : TC. - DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), - -DxbcSrc::R(coord_temp, DxbcSrc::kYYYY), - DxbcSrc::R(coord_temp, DxbcSrc::kYYYY)); + DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), + -DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY)); // Y = neg ? -1 : 1 (TC overwritten). - DxbcOpMovC(DxbcDest::R(coord_temp, 0b0010), - DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), DxbcSrc::LF(-1.0f), - DxbcSrc::LF(1.0f)); + DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0010), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), + DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f)); } DxbcOpBreak(); DxbcOpDefault(); { // Z is the major axis. // X = neg ? -SC : SC (SC overwritten). - DxbcOpMovC(DxbcDest::R(coord_temp, 0b0001), - DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), - -DxbcSrc::R(coord_temp, DxbcSrc::kXXXX), - DxbcSrc::R(coord_temp, DxbcSrc::kXXXX)); + DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0001), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), + -DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX)); // Y = -TC (TC overwritten). - DxbcOpMov(DxbcDest::R(coord_temp, 0b0010), - -DxbcSrc::R(coord_temp, DxbcSrc::kYYYY)); + DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0010), + -DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY)); // Z = neg ? -1 : 1. - DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), DxbcSrc::LF(-1.0f), - DxbcSrc::LF(1.0f)); + DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), + DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f)); } DxbcOpBreak(); DxbcOpEndSwitch(); @@ -1145,10 +1167,26 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // since the return value can be used with bias later, forcing linear mip // filtering (the XNA assembler also doesn't accept MipFilter overrides // for getCompTexLOD). - DxbcSrc sampler(FindOrAddSamplerBinding( + uint32_t sampler_binding_index = FindOrAddSamplerBinding( tfetch_index, instr.attributes.mag_filter, instr.attributes.min_filter, TextureFilter::kLinear, - instr.attributes.aniso_filter)); + instr.attributes.aniso_filter); + DxbcSrc sampler(DxbcSrc::S(sampler_binding_index, sampler_binding_index)); + if (bindless_resources_used_) { + // Load the sampler index to coord_and_sampler_temp.w and use relative + // sampler indexing. + if (cbuffer_index_descriptor_indices_ == kBindingIndexUnallocated) { + cbuffer_index_descriptor_indices_ = cbuffer_count_++; + } + uint32_t sampler_bindless_descriptor_index = + sampler_bindings_[sampler_binding_index].bindless_descriptor_index; + DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b1000), + DxbcSrc::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + sampler_bindless_descriptor_index >> 2) + .Select(sampler_bindless_descriptor_index & 3)); + sampler = DxbcSrc::S(0, DxbcIndex(coord_and_sampler_temp, 3)); + } // Check which SRV needs to be accessed - signed or unsigned. If there is // at least one non-signed component, will be using the unsigned one. uint32_t is_unsigned_temp = PushSystemTemp(); @@ -1158,13 +1196,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( DxbcOpINE(DxbcDest::R(is_unsigned_temp, 0b0001), DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX), DxbcSrc::LU(uint32_t(TextureSign::kSigned) * 0b01010101)); - DxbcOpIf(true, DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX)); - // Release is_unsigned_temp. - PopSystemTemp(); - for (uint32_t is_signed = 0; is_signed < 2; ++is_signed) { - if (is_signed) { - DxbcOpElse(); - } + if (bindless_resources_used_) { + // Bindless path - select the SRV index between unsigned and signed to + // query. if (instr.dimension == TextureDimension::k3D) { // Check if 3D. assert_true((size_needed_components & 0b1000) == 0b1000); @@ -1173,37 +1207,119 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( for (uint32_t is_stacked = 0; is_stacked < (instr.dimension == TextureDimension::k3D ? 2u : 1u); ++is_stacked) { + TextureDimension srv_dimension = instr.dimension; if (is_stacked) { + srv_dimension = TextureDimension::k2D; DxbcOpElse(); } - // Always 3 coordinate components (1D and 2D are padded to 2D arrays, - // 3D and cube have 3 coordinate dimensions). Not caring about - // normalization of the array layer because it doesn't participate in - // LOD calculation in Direct3D 12. + uint32_t texture_binding_index_unsigned = + FindOrAddTextureBinding(tfetch_index, srv_dimension, false); + uint32_t texture_binding_index_signed = + FindOrAddTextureBinding(tfetch_index, srv_dimension, true); + uint32_t texture_bindless_descriptor_index_unsigned = + texture_bindings_[texture_binding_index_unsigned] + .bindless_descriptor_index; + uint32_t texture_bindless_descriptor_index_signed = + texture_bindings_[texture_binding_index_signed] + .bindless_descriptor_index; + if (cbuffer_index_descriptor_indices_ == kBindingIndexUnallocated) { + cbuffer_index_descriptor_indices_ = cbuffer_count_++; + } + DxbcOpMovC( + DxbcDest::R(is_unsigned_temp, 0b0001), + DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX), + DxbcSrc::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + texture_bindless_descriptor_index_unsigned >> 2) + .Select(texture_bindless_descriptor_index_unsigned & 3), + DxbcSrc::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + texture_bindless_descriptor_index_signed >> 2) + .Select(texture_bindless_descriptor_index_signed & 3)); + // Always 3 coordinate components (1D and 2D are padded to 2D + // arrays, 3D and cube have 3 coordinate dimensions). Not caring + // about normalization of the array layer because it doesn't + // participate in LOD calculation in Direct3D 12. // The `lod` instruction returns the unclamped LOD (probably need // unclamped so it can be biased back into the range later) in the Y // component, and the resource swizzle is the return value swizzle. // FIXME(Triang3l): Gradient exponent adjustment from the fetch - // constant needs to be applied here, would require SV_Position.xy & 1 - // math, replacing coordinates for one pixel with 0 and for another - // with the adjusted gradient, but possibly not used by any games. + // constant needs to be applied here, would require math involving + // SV_Position parity, replacing coordinates for one pixel with 0 + // and for another with the adjusted gradient, but possibly not used + // by any games. assert_true(used_result_nonzero_components == 0b0001); + uint32_t* bindless_srv_index = nullptr; + switch (srv_dimension) { + case TextureDimension::k1D: + case TextureDimension::k2D: + bindless_srv_index = &srv_index_bindless_textures_2d_; + break; + case TextureDimension::k3D: + bindless_srv_index = &srv_index_bindless_textures_3d_; + break; + case TextureDimension::kCube: + bindless_srv_index = &srv_index_bindless_textures_cube_; + break; + } + assert_not_null(bindless_srv_index); + if (*bindless_srv_index == kBindingIndexUnallocated) { + *bindless_srv_index = srv_count_++; + } DxbcOpLOD(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(coord_temp), 3, - FindOrAddTextureSRV( - tfetch_index, - is_stacked ? TextureDimension::k2D : instr.dimension, - is_signed != 0) - .Select(1), + DxbcSrc::R(coord_and_sampler_temp), 3, + DxbcSrc::T(*bindless_srv_index, + DxbcIndex(is_unsigned_temp, 0), DxbcSrc::kYYYY), sampler); } if (instr.dimension == TextureDimension::k3D) { // Close the 3D/stacked check. DxbcOpEndIf(); } + } else { + // Bindful path - conditionally query one of the SRVs. + DxbcOpIf(true, DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX)); + for (uint32_t is_signed = 0; is_signed < 2; ++is_signed) { + if (is_signed) { + DxbcOpElse(); + } + if (instr.dimension == TextureDimension::k3D) { + // Check if 3D. + assert_true((size_needed_components & 0b1000) == 0b1000); + DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); + } + for (uint32_t is_stacked = 0; + is_stacked < + (instr.dimension == TextureDimension::k3D ? 2u : 1u); + ++is_stacked) { + if (is_stacked) { + DxbcOpElse(); + } + assert_true(used_result_nonzero_components == 0b0001); + uint32_t texture_binding_index = FindOrAddTextureBinding( + tfetch_index, + is_stacked ? TextureDimension::k2D : instr.dimension, + is_signed != 0); + DxbcOpLOD( + DxbcDest::R(system_temp_result_, 0b0001), + DxbcSrc::R(coord_and_sampler_temp), 3, + DxbcSrc::T( + texture_bindings_[texture_binding_index].bindful_srv_index, + uint32_t(SRVMainRegister::kBindfulTexturesStart) + + texture_binding_index, + DxbcSrc::kYYYY), + sampler); + } + if (instr.dimension == TextureDimension::k3D) { + // Close the 3D/stacked check. + DxbcOpEndIf(); + } + } + // Close the signedness check. + DxbcOpEndIf(); } - // Close the signedness check. - DxbcOpEndIf(); + // Release is_unsigned_temp. + PopSystemTemp(); } else { // - Gradients or LOD to be passed to the sample_d/sample_l. @@ -1322,11 +1438,11 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } else { // Coarse is according to the Direct3D 11.3 specification. DxbcOpDerivRTXCoarse(DxbcDest::R(grad_h_lod_temp, grad_mask), - DxbcSrc::R(coord_temp)); + DxbcSrc::R(coord_and_sampler_temp)); DxbcOpMul(DxbcDest::R(grad_h_lod_temp, grad_mask), DxbcSrc::R(grad_h_lod_temp), lod_src); DxbcOpDerivRTYCoarse(DxbcDest::R(grad_v_temp, grad_mask), - DxbcSrc::R(coord_temp)); + DxbcSrc::R(coord_and_sampler_temp)); // FIXME(Triang3l): Gradient exponent adjustment is currently not // done in getCompTexLOD, so don't do it here too. #if 0 @@ -1357,11 +1473,27 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // doesn't allow mixing anisotropic and point filtering. Possibly // anistropic filtering should be disabled when explicit LOD is used - do // this here. - DxbcSrc sampler(FindOrAddSamplerBinding( + uint32_t sampler_binding_index = FindOrAddSamplerBinding( tfetch_index, instr.attributes.mag_filter, instr.attributes.min_filter, instr.attributes.mip_filter, use_computed_lod ? instr.attributes.aniso_filter - : AnisoFilter::kDisabled)); + : AnisoFilter::kDisabled); + DxbcSrc sampler(DxbcSrc::S(sampler_binding_index, sampler_binding_index)); + if (bindless_resources_used_) { + // Load the sampler index to coord_and_sampler_temp.w and use relative + // sampler indexing. + if (cbuffer_index_descriptor_indices_ == kBindingIndexUnallocated) { + cbuffer_index_descriptor_indices_ = cbuffer_count_++; + } + uint32_t sampler_bindless_descriptor_index = + sampler_bindings_[sampler_binding_index].bindless_descriptor_index; + DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b1000), + DxbcSrc::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + sampler_bindless_descriptor_index >> 2) + .Select(sampler_bindless_descriptor_index & 3)); + sampler = DxbcSrc::S(0, DxbcIndex(coord_and_sampler_temp, 3)); + } // Break result register dependencies because textures will be sampled // conditionally, including the primary signs. @@ -1389,9 +1521,12 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // - srv_selection_temp.z - if stacked and not forced to be point-sampled, // the lerp factor between two layers, wrapped by layer_lerp_factor_src // with l(0.0) fallback for the point sampling case. - // - srv_selection_temp.w - scratch for calculations involving these. + // - srv_selection_temp.w - first, scratch for calculations involving + // these, then, unsigned or signed SRV description index. DxbcSrc layer_lerp_factor_src(DxbcSrc::LF(0.0f)); - uint32_t srv_selection_temp = UINT32_MAX; + // W is always needed for bindless. + uint32_t srv_selection_temp = + bindless_resources_used_ ? PushSystemTemp() : UINT32_MAX; if (instr.dimension == TextureDimension::k3D) { bool vol_mag_filter_is_fetch_const = instr.attributes.vol_mag_filter == TextureFilter::kUseFetchConst; @@ -1469,10 +1604,11 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } // For linear filtering, subtract 0.5 from the coordinates and store // the lerp factor. Flooring will be done later. - DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ), DxbcSrc::LF(-0.5f)); + DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), + DxbcSrc::LF(-0.5f)); DxbcOpFrc(DxbcDest::R(srv_selection_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ)); + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); // Close the linear check. DxbcOpEndIf(); // Close the stacked check. @@ -1505,11 +1641,11 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } // For linear filtering, subtract 0.5 from the coordinates and store // the lerp factor. Flooring will be done later. - DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ), + DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), DxbcSrc::LF(-0.5f)); DxbcOpFrc(DxbcDest::R(srv_selection_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ)); + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); if (vol_mag_filter_is_fetch_const) { // Close the fetch constant linear filtering mode check. DxbcOpEndIf(); @@ -1578,13 +1714,50 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // for the layer index, but on the Xbox 360, addressing is similar to // that of 3D textures). This is needed for both point and linear // filtering (with linear, 0.5 was subtracted previously). - DxbcOpRoundNI(DxbcDest::R(coord_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ)); + DxbcOpRoundNI(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); + } + uint32_t texture_binding_index_unsigned = + FindOrAddTextureBinding(tfetch_index, srv_dimension, false); + const TextureBinding& texture_binding_unsigned = + texture_bindings_[texture_binding_index_unsigned]; + uint32_t texture_binding_index_signed = + FindOrAddTextureBinding(tfetch_index, srv_dimension, true); + const TextureBinding& texture_binding_signed = + texture_bindings_[texture_binding_index_signed]; + DxbcSrc srv_unsigned(DxbcSrc::LF(0.0f)), srv_signed(DxbcSrc::LF(0.0f)); + if (bindless_resources_used_) { + uint32_t* bindless_srv_index = nullptr; + switch (srv_dimension) { + case TextureDimension::k1D: + case TextureDimension::k2D: + bindless_srv_index = &srv_index_bindless_textures_2d_; + break; + case TextureDimension::k3D: + bindless_srv_index = &srv_index_bindless_textures_3d_; + break; + case TextureDimension::kCube: + bindless_srv_index = &srv_index_bindless_textures_cube_; + break; + } + assert_not_null(bindless_srv_index); + if (*bindless_srv_index == kBindingIndexUnallocated) { + *bindless_srv_index = srv_count_++; + } + assert_true(srv_selection_temp != UINT32_MAX); + srv_unsigned = + DxbcSrc::T(*bindless_srv_index, DxbcIndex(srv_selection_temp, 3)); + srv_signed = srv_unsigned; + } else { + srv_unsigned = + DxbcSrc::T(texture_binding_unsigned.bindful_srv_index, + uint32_t(SRVMainRegister::kBindfulTexturesStart) + + texture_binding_index_unsigned); + srv_signed = + DxbcSrc::T(texture_binding_signed.bindful_srv_index, + uint32_t(SRVMainRegister::kBindfulTexturesStart) + + texture_binding_index_signed); } - DxbcSrc srv_unsigned( - FindOrAddTextureSRV(tfetch_index, srv_dimension, false)); - DxbcSrc srv_signed( - FindOrAddTextureSRV(tfetch_index, srv_dimension, true)); for (uint32_t layer = 0; layer < (layer_lerp_needed ? 2u : 1u); ++layer) { uint32_t layer_value_temp = system_temp_result_; @@ -1596,8 +1769,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // If the lerp factor is not zero, sample the next layer. DxbcOpIf(true, DxbcSrc::R(layer_value_temp, DxbcSrc::kXXXX)); // Go to the next layer. - DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100), - DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ), + DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100), + DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), DxbcSrc::LF(1.0f)); } // Always 3 coordinate components (1D and 2D are padded to 2D arrays, @@ -1605,17 +1778,34 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( DxbcOpIf(false, is_all_signed_src); { // Sample the unsigned texture. + if (bindless_resources_used_) { + // Load the unsigned texture descriptor index. + assert_true(srv_selection_temp != UINT32_MAX); + if (cbuffer_index_descriptor_indices_ == + kBindingIndexUnallocated) { + cbuffer_index_descriptor_indices_ = cbuffer_count_++; + } + uint32_t texture_bindless_descriptor_index = + texture_binding_unsigned.bindless_descriptor_index; + DxbcOpMov( + DxbcDest::R(srv_selection_temp, 0b1000), + DxbcSrc::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + texture_bindless_descriptor_index >> 2) + .Select(texture_bindless_descriptor_index & 3)); + } if (grad_v_temp != UINT32_MAX) { assert_not_zero(grad_component_count); DxbcOpSampleD( DxbcDest::R(layer_value_temp, used_result_nonzero_components), - DxbcSrc::R(coord_temp), 3, srv_unsigned, sampler, + DxbcSrc::R(coord_and_sampler_temp), 3, srv_unsigned, sampler, DxbcSrc::R(grad_h_lod_temp), DxbcSrc::R(grad_v_temp), srv_grad_component_count); } else { DxbcOpSampleL( DxbcDest::R(layer_value_temp, used_result_nonzero_components), - DxbcSrc::R(coord_temp), 3, srv_unsigned, sampler, lod_src); + DxbcSrc::R(coord_and_sampler_temp), 3, srv_unsigned, sampler, + lod_src); } } DxbcOpEndIf(); @@ -1623,17 +1813,34 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( { // Sample the signed texture. uint32_t signed_temp = PushSystemTemp(); + if (bindless_resources_used_) { + // Load the signed texture descriptor index. + assert_true(srv_selection_temp != UINT32_MAX); + if (cbuffer_index_descriptor_indices_ == + kBindingIndexUnallocated) { + cbuffer_index_descriptor_indices_ = cbuffer_count_++; + } + uint32_t texture_bindless_descriptor_index = + texture_binding_signed.bindless_descriptor_index; + DxbcOpMov( + DxbcDest::R(srv_selection_temp, 0b1000), + DxbcSrc::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + texture_bindless_descriptor_index >> 2) + .Select(texture_bindless_descriptor_index & 3)); + } if (grad_v_temp != UINT32_MAX) { assert_not_zero(grad_component_count); DxbcOpSampleD( DxbcDest::R(signed_temp, used_result_nonzero_components), - DxbcSrc::R(coord_temp), 3, srv_signed, sampler, + DxbcSrc::R(coord_and_sampler_temp), 3, srv_signed, sampler, DxbcSrc::R(grad_h_lod_temp), DxbcSrc::R(grad_v_temp), srv_grad_component_count); } else { DxbcOpSampleL( DxbcDest::R(signed_temp, used_result_nonzero_components), - DxbcSrc::R(coord_temp), 3, srv_signed, sampler, lod_src); + DxbcSrc::R(coord_and_sampler_temp), 3, srv_signed, sampler, + lod_src); } DxbcOpMovC( DxbcDest::R(layer_value_temp, used_result_nonzero_components), @@ -1680,7 +1887,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } } - // Release coord_temp. + // Release coord_and_sampler_temp. PopSystemTemp(); // Apply the bias and gamma correction (gamma is after filtering here, diff --git a/src/xenia/gpu/dxbc_shader_translator_memexport.cc b/src/xenia/gpu/dxbc_shader_translator_memexport.cc index 330c5e433..359ce8741 100644 --- a/src/xenia/gpu/dxbc_shader_translator_memexport.cc +++ b/src/xenia/gpu/dxbc_shader_translator_memexport.cc @@ -435,8 +435,12 @@ void DxbcShaderTranslator::ExportToMemory() { DxbcOpSwitch(element_size_src); for (uint32_t k = 1; k <= 4; k <<= 1) { DxbcOpCase(DxbcSrc::LU(k * 4)); + if (uav_index_shared_memory_ == kBindingIndexUnallocated) { + uav_index_shared_memory_ = uav_count_++; + } DxbcOpStoreRaw( - DxbcDest::U(0, uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1), + DxbcDest::U(uav_index_shared_memory_, + uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1), address_src, eM_src); DxbcOpBreak(); } diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index 297b3b9ab..f709ff794 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -1575,8 +1575,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { DxbcOpIf(true, temp_x_src); { // Write the new depth/stencil. + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } DxbcOpStoreUAVTyped( - DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)), + DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, DxbcSrc::R(system_temp_rov_depth_stencil_).Select(i)); } @@ -1955,10 +1958,13 @@ void DxbcShaderTranslator:: // Load the old depth/stencil value to VGPR [0].z. // VGPR [0].x = new depth // VGPR [0].z = old depth/stencil + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } DxbcOpLdUAVTyped(DxbcDest::R(system_temps_subroutine_, 0b0100), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, - DxbcSrc::U(ROV_GetEDRAMUAVIndex(), - uint32_t(UAVRegister::kEDRAM), DxbcSrc::kXXXX)); + DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM), + DxbcSrc::kXXXX)); // Extract the old depth part to VGPR [0].w. // VGPR [0].x = new depth // VGPR [0].z = old depth/stencil @@ -2398,8 +2404,11 @@ void DxbcShaderTranslator:: // Write the new depth/stencil. // VGPR [0].x = new depth/stencil // VGPR [0].y = depth/stencil test failure + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } DxbcOpStoreUAVTyped( - DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)), + DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX)); if (depth_stencil_early) { @@ -2499,10 +2508,13 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine( // Load the lower 32 bits of the 64bpp color to VGPR [0].z. // VGPRs [0].xy - packed source color/alpha if not blending. // VGPR [0].z - lower 32 bits of the packed color. + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } DxbcOpLdUAVTyped( DxbcDest::R(system_temps_subroutine_, 0b0100), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), 1, - DxbcSrc::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM), + DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM), DxbcSrc::kXXXX)); // Get the address of the upper 32 bits of the color to VGPR [0].w. // VGPRs [0].xy - packed source color/alpha if not blending. @@ -2514,10 +2526,13 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine( // Load the upper 32 bits of the 64bpp color to VGPR [0].w. // VGPRs [0].xy - packed source color/alpha if not blending. // VGPRs [0].zw - packed destination color/alpha. + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } DxbcOpLdUAVTyped( DxbcDest::R(system_temps_subroutine_, 0b1000), DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kWWWW), 1, - DxbcSrc::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM), + DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM), DxbcSrc::kXXXX)); } // The color is 32bpp. @@ -2526,10 +2541,13 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine( // Load the 32bpp color to VGPR [0].z. // VGPRs [0].xy - packed source color/alpha if not blending. // VGPR [0].z - packed 32bpp destination color. + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } DxbcOpLdUAVTyped( DxbcDest::R(system_temps_subroutine_, 0b0100), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), 1, - DxbcSrc::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM), + DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM), DxbcSrc::kXXXX)); // Break register dependency in VGPR [0].w if the color is 32bpp. // VGPRs [0].xy - packed source color/alpha if not blending. @@ -3276,8 +3294,11 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine( DxbcOpIf(true, DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kZZZZ)); { // Store the lower 32 bits of the 64bpp color. + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } DxbcOpStoreUAVTyped( - DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)), + DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), 1, DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX)); // Get the address of the upper 32 bits of the color to VGPR [0].z (can't @@ -3289,8 +3310,11 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine( DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), DxbcSrc::LU(1)); // Store the upper 32 bits of the 64bpp color. + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } DxbcOpStoreUAVTyped( - DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)), + DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)), DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kZZZZ), 1, DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY)); } @@ -3298,8 +3322,11 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine( DxbcOpElse(); { // Store the 32bpp color. + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } DxbcOpStoreUAVTyped( - DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)), + DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), 1, DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX)); } diff --git a/src/xenia/gpu/shader_compiler_main.cc b/src/xenia/gpu/shader_compiler_main.cc index d5c8dd51e..5cdbcb61f 100644 --- a/src/xenia/gpu/shader_compiler_main.cc +++ b/src/xenia/gpu/shader_compiler_main.cc @@ -41,6 +41,8 @@ DEFINE_string( "[vertex or unspecified, linedomaincp, linedomainpatch, triangledomaincp, " "triangledomainpatch, quaddomaincp, quaddomainpatch].", "GPU"); +DEFINE_bool(shader_output_bindless_resources, false, + "Output host shader with bindless resources used.", "GPU"); DEFINE_bool(shader_output_dxbc_rov, false, "Output ROV-based output-merger code in DXBC pixel shaders.", "GPU"); @@ -109,7 +111,8 @@ int shader_compiler_main(const std::vector& args) { } else if (cvars::shader_output_type == "dxbc" || cvars::shader_output_type == "dxbctext") { translator = std::make_unique( - 0, cvars::shader_output_dxbc_rov); + 0, cvars::shader_output_bindless_resources, + cvars::shader_output_dxbc_rov); } else { translator = std::make_unique(); } diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index 80da7576f..413c6d421 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -327,19 +327,22 @@ bool D3D12Provider::Initialize() { // Check if optional features are supported. rasterizer_ordered_views_supported_ = false; - tiled_resources_tier_ = 0; + resource_binding_tier_ = D3D12_RESOURCE_BINDING_TIER_1; + tiled_resources_tier_ = D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED; D3D12_FEATURE_DATA_D3D12_OPTIONS options; if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)))) { rasterizer_ordered_views_supported_ = options.ROVsSupported ? true : false; - tiled_resources_tier_ = uint32_t(options.TiledResourcesTier); + resource_binding_tier_ = options.ResourceBindingTier; + tiled_resources_tier_ = options.TiledResourcesTier; } - programmable_sample_positions_tier_ = 0; + programmable_sample_positions_tier_ = + D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED; D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2; if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &options2, sizeof(options2)))) { programmable_sample_positions_tier_ = - uint32_t(options2.ProgrammableSamplePositionsTier); + options2.ProgrammableSamplePositionsTier; } virtual_address_bits_per_resource_ = 0; D3D12_FEATURE_DATA_GPU_VIRTUAL_ADDRESS_SUPPORT virtual_address_support; @@ -349,14 +352,17 @@ bool D3D12Provider::Initialize() { virtual_address_bits_per_resource_ = virtual_address_support.MaxGPUVirtualAddressBitsPerResource; } - XELOGD3D("Direct3D 12 device features:"); - XELOGD3D("* Max GPU virtual address bits per resource: {}", - virtual_address_bits_per_resource_); - XELOGD3D("* Programmable sample positions: tier {}", - programmable_sample_positions_tier_); - XELOGD3D("* Rasterizer-ordered views: {}", - rasterizer_ordered_views_supported_ ? "yes" : "no"); - XELOGD3D("* Tiled resources: tier {}", tiled_resources_tier_); + XELOGD3D( + "Direct3D 12 device features:\n" + "Max GPU virtual address bits per resource: {}\n" + "Programmable sample positions: tier {}\n" + "Rasterizer-ordered views: {}\n" + "Resource binding: tier {}\n" + "Tiled resources: tier {}\n", + virtual_address_bits_per_resource_, + uint32_t(programmable_sample_positions_tier_), + rasterizer_ordered_views_supported_ ? "yes" : "no", + uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_)); // Get the graphics analysis interface, will silently fail if PIX is not // attached. diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index fed9ec8fd..0e79f56bb 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -68,13 +68,19 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetAdapterVendorID() const { return adapter_vendor_id_; } // Device features. - uint32_t GetProgrammableSamplePositionsTier() const { + D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER + GetProgrammableSamplePositionsTier() const { return programmable_sample_positions_tier_; } bool AreRasterizerOrderedViewsSupported() const { return rasterizer_ordered_views_supported_; } - uint32_t GetTiledResourcesTier() const { return tiled_resources_tier_; } + D3D12_RESOURCE_BINDING_TIER GetResourceBindingTier() const { + return resource_binding_tier_; + } + D3D12_TILED_RESOURCES_TIER GetTiledResourcesTier() const { + return tiled_resources_tier_; + } uint32_t GetVirtualAddressBitsPerResource() const { return virtual_address_bits_per_resource_; } @@ -128,9 +134,10 @@ class D3D12Provider : public GraphicsProvider { uint32_t adapter_vendor_id_; - uint32_t programmable_sample_positions_tier_; + D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER programmable_sample_positions_tier_; bool rasterizer_ordered_views_supported_; - uint32_t tiled_resources_tier_; + D3D12_RESOURCE_BINDING_TIER resource_binding_tier_; + D3D12_TILED_RESOURCES_TIER tiled_resources_tier_; uint32_t virtual_address_bits_per_resource_; }; diff --git a/src/xenia/ui/d3d12/d3d12_util.h b/src/xenia/ui/d3d12/d3d12_util.h index 947ab9c44..68c440c40 100644 --- a/src/xenia/ui/d3d12/d3d12_util.h +++ b/src/xenia/ui/d3d12/d3d12_util.h @@ -10,6 +10,8 @@ #ifndef XENIA_UI_D3D12_D3D12_UTIL_H_ #define XENIA_UI_D3D12_D3D12_UTIL_H_ +#include + #include "xenia/ui/d3d12/d3d12_provider.h" namespace xe { @@ -17,6 +19,9 @@ namespace ui { namespace d3d12 { namespace util { +using DescriptorCPUGPUHandlePair = + std::pair; + extern const D3D12_HEAP_PROPERTIES kHeapPropertiesDefault; extern const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload; extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback;