From 35aaa7272273463e1836928aa330024ea8228422 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 3 Aug 2018 16:39:13 +0300 Subject: [PATCH] [D3D12] SRV and sampler bindings --- .../gpu/d3d12/d3d12_command_processor.cc | 362 +++++++++++++----- src/xenia/gpu/d3d12/d3d12_command_processor.h | 50 +-- src/xenia/gpu/d3d12/d3d12_shader.cc | 21 + src/xenia/gpu/d3d12/d3d12_shader.h | 29 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 10 +- src/xenia/gpu/d3d12/pipeline_cache.h | 4 +- src/xenia/gpu/d3d12/texture_cache.cc | 87 +++++ src/xenia/gpu/d3d12/texture_cache.h | 72 ++++ src/xenia/gpu/hlsl_shader_translator.cc | 74 +++- src/xenia/gpu/hlsl_shader_translator.h | 33 +- 10 files changed, 588 insertions(+), 154 deletions(-) create mode 100644 src/xenia/gpu/d3d12/texture_cache.cc create mode 100644 src/xenia/gpu/d3d12/texture_cache.h diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 19142a387..8c7899ef2 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -47,17 +47,19 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( assert_true(vertex_shader->is_translated()); assert_true(pixel_shader == nullptr || pixel_shader->is_translated()); - uint32_t pixel_textures = - pixel_shader != nullptr ? pixel_shader->GetTextureSRVCount() : 0; - uint32_t pixel_samplers = - pixel_shader != nullptr ? pixel_shader->GetSamplerCount() : 0; - uint32_t vertex_textures = vertex_shader->GetTextureSRVCount(); - uint32_t vertex_samplers = vertex_shader->GetSamplerCount(); + uint32_t pixel_texture_count = 0, pixel_sampler_count = 0; + if (pixel_shader != nullptr) { + pixel_shader->GetTextureSRVs(pixel_texture_count); + pixel_shader->GetSamplerFetchConstants(pixel_sampler_count); + } + uint32_t vertex_texture_count, vertex_sampler_count; + vertex_shader->GetTextureSRVs(vertex_texture_count); + vertex_shader->GetSamplerFetchConstants(vertex_sampler_count); // Max 96 textures (if all kinds of tfetch instructions are used for all fetch // registers) and 32 samplers (one sampler per used fetch), but different // shader stages have different texture sets. - uint32_t index = pixel_textures | (pixel_samplers << 7) | - (vertex_textures << 12) | (vertex_samplers << 19); + uint32_t index = pixel_texture_count | (pixel_sampler_count << 7) | + (vertex_texture_count << 12) | (vertex_sampler_count << 19); // Try an existing root signature. auto it = root_signatures_.find(index); @@ -67,14 +69,16 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( // Create a new one. D3D12_ROOT_SIGNATURE_DESC desc; - D3D12_ROOT_PARAMETER parameters[kRootParameter_Count_TwoStageTextures]; - D3D12_DESCRIPTOR_RANGE ranges[kRootParameter_Count_TwoStageTextures]; - desc.NumParameters = kRootParameter_Count_NoTextures; + D3D12_ROOT_PARAMETER parameters[kRootParameter_Count_Max]; + D3D12_DESCRIPTOR_RANGE ranges[kRootParameter_Count_Max]; + desc.NumParameters = kRootParameter_Count_Base; desc.pParameters = parameters; desc.NumStaticSamplers = 0; desc.pStaticSamplers = nullptr; desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + // Base parameters. + // Fetch constants. { auto& parameter = parameters[kRootParameter_FetchConstants]; @@ -150,86 +154,70 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( range.OffsetInDescriptorsFromTableStart = 0; } - if (pixel_textures > 0 || vertex_textures > 0) { - desc.NumParameters = kRootParameter_Count_OneStageTextures; + // Extra parameters. - // Pixel or vertex textures. - { - auto& parameter = parameters[kRootParameter_PixelOrVertexTextures]; - auto& range = ranges[kRootParameter_PixelOrVertexTextures]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - if (pixel_textures > 0) { - assert_true(pixel_samplers > 0); - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - range.NumDescriptors = pixel_textures; - } else { - assert_true(vertex_samplers > 0); - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - range.NumDescriptors = vertex_textures; - } - } + // Pixel textures. + if (pixel_texture_count > 0) { + auto& parameter = parameters[desc.NumParameters]; + auto& range = ranges[desc.NumParameters]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = pixel_texture_count; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + ++desc.NumParameters; + } - // Pixel or vertex samplers. - { - auto& parameter = parameters[kRootParameter_PixelOrVertexSamplers]; - auto& range = ranges[kRootParameter_PixelOrVertexSamplers]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - if (pixel_samplers > 0) { - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - range.NumDescriptors = pixel_samplers; - } else { - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - range.NumDescriptors = vertex_samplers; - } - } + // Pixel samplers. + if (pixel_sampler_count > 0) { + auto& parameter = parameters[desc.NumParameters]; + auto& range = ranges[desc.NumParameters]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + range.NumDescriptors = pixel_sampler_count; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + ++desc.NumParameters; + } - if (pixel_textures > 0 && vertex_textures > 0) { - assert_true(vertex_samplers > 0); + // Vertex textures. + if (vertex_texture_count > 0) { + auto& parameter = parameters[desc.NumParameters]; + auto& range = ranges[desc.NumParameters]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = vertex_texture_count; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + ++desc.NumParameters; + } - desc.NumParameters = kRootParameter_Count_TwoStageTextures; - - // Vertex textures. - { - auto& parameter = parameters[kRootParameter_VertexTextures]; - auto& range = ranges[kRootParameter_VertexTextures]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = vertex_textures; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - } - - // Vertex samplers. - { - auto& parameter = parameters[kRootParameter_VertexSamplers]; - auto& range = ranges[kRootParameter_VertexSamplers]; - parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ - parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; - range.NumDescriptors = vertex_samplers; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; - } - } + // Vertex samplers. + if (vertex_sampler_count > 0) { + auto& parameter = parameters[desc.NumParameters]; + auto& range = ranges[desc.NumParameters]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + range.NumDescriptors = vertex_sampler_count; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + ++desc.NumParameters; } ID3DBlob* blob; @@ -239,7 +227,8 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( XELOGE( "Failed to serialize a root signature with %u pixel textures, %u " "pixel samplers, %u vertex textures and %u vertex samplers", - pixel_textures, pixel_samplers, vertex_textures, vertex_samplers); + pixel_texture_count, pixel_sampler_count, vertex_texture_count, + vertex_sampler_count); if (error_blob != nullptr) { XELOGE("%s", reinterpret_cast(error_blob->GetBufferPointer())); @@ -259,7 +248,8 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( XELOGE( "Failed to create a root signature with %u pixel textures, %u pixel " "samplers, %u vertex textures and %u vertex samplers", - pixel_textures, pixel_samplers, vertex_textures, vertex_samplers); + pixel_texture_count, pixel_sampler_count, vertex_texture_count, + vertex_sampler_count); blob->Release(); return nullptr; } @@ -269,6 +259,42 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( return root_signature; } +uint32_t D3D12CommandProcessor::GetRootExtraParameterIndices( + const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, + RootExtraParameterIndices& indices_out) { + uint32_t pixel_texture_count = 0, pixel_sampler_count = 0; + if (pixel_shader != nullptr) { + pixel_shader->GetTextureSRVs(pixel_texture_count); + pixel_shader->GetSamplerFetchConstants(pixel_sampler_count); + } + uint32_t vertex_texture_count, vertex_sampler_count; + vertex_shader->GetTextureSRVs(vertex_texture_count); + vertex_shader->GetSamplerFetchConstants(vertex_sampler_count); + + uint32_t index = kRootParameter_Count_Base; + if (pixel_texture_count != 0) { + indices_out.pixel_textures = index++; + } else { + indices_out.pixel_textures = RootExtraParameterIndices::kUnavailable; + } + if (pixel_sampler_count != 0) { + indices_out.pixel_samplers = index++; + } else { + indices_out.pixel_samplers = RootExtraParameterIndices::kUnavailable; + } + if (vertex_texture_count != 0) { + indices_out.vertex_textures = index++; + } else { + indices_out.vertex_textures = RootExtraParameterIndices::kUnavailable; + } + if (vertex_sampler_count != 0) { + indices_out.vertex_samplers = index++; + } else { + indices_out.vertex_samplers = RootExtraParameterIndices::kUnavailable; + } + return index; +} + uint64_t D3D12CommandProcessor::RequestViewDescriptors( uint64_t previous_full_update, uint32_t count_for_partial_update, uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, @@ -331,9 +357,9 @@ uint64_t D3D12CommandProcessor::RequestSamplerDescriptors( descriptor_index * GetD3D12Context()->GetD3D12Provider()->GetDescriptorSizeSampler(); cpu_handle_out.ptr = - view_heap_pool_->GetLastRequestHeapCPUStart().ptr + descriptor_offset; + sampler_heap_pool_->GetLastRequestHeapCPUStart().ptr + descriptor_offset; gpu_handle_out.ptr = - view_heap_pool_->GetLastRequestHeapGPUStart().ptr + descriptor_offset; + sampler_heap_pool_->GetLastRequestHeapGPUStart().ptr + descriptor_offset; return current_full_update; } @@ -447,6 +473,8 @@ bool D3D12CommandProcessor::SetupContext() { pipeline_cache_ = std::make_unique(this, register_file_); + texture_cache_ = std::make_unique(this, register_file_); + return true; } @@ -469,6 +497,8 @@ void D3D12CommandProcessor::ShutdownContext() { view_heap_pool_.reset(); constant_buffer_pool_.reset(); + texture_cache_.reset(); + pipeline_cache_.reset(); // Root signatured are used by pipelines, thus freed after the pipelines. @@ -526,6 +556,8 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, pipeline_cache_->ClearCache(); + texture_cache_->ClearCache(); + for (auto it : root_signatures_) { it.second->Release(); } @@ -1066,19 +1098,46 @@ bool D3D12CommandProcessor::UpdateBindings( // Bind the new root signature. if (current_graphics_root_signature_ != root_signature) { current_graphics_root_signature_ = root_signature; + GetRootExtraParameterIndices(vertex_shader, pixel_shader, + current_graphics_root_extras_); // We don't know which root parameters are up to date anymore. current_graphics_root_up_to_date_ = 0; command_list->SetGraphicsRootSignature(root_signature); } + // Get used textures and samplers. + uint32_t pixel_texture_count, pixel_sampler_count; + const D3D12Shader::TextureSRV* pixel_textures; + const uint32_t* pixel_samplers; + if (pixel_shader != nullptr) { + pixel_textures = pixel_shader->GetTextureSRVs(pixel_texture_count); + pixel_samplers = + pixel_shader->GetSamplerFetchConstants(pixel_sampler_count); + } else { + pixel_textures = nullptr; + pixel_texture_count = 0; + pixel_samplers = nullptr; + pixel_sampler_count = 0; + } + uint32_t vertex_texture_count, vertex_sampler_count; + const D3D12Shader::TextureSRV* vertex_textures = + vertex_shader->GetTextureSRVs(vertex_texture_count); + const uint32_t* vertex_samplers = + vertex_shader->GetSamplerFetchConstants(vertex_sampler_count); + uint32_t texture_count = pixel_texture_count + vertex_texture_count; + uint32_t sampler_count = pixel_sampler_count + vertex_sampler_count; + // Begin updating descriptors. bool write_common_constant_views = false; bool write_vertex_float_constant_views = false; bool write_pixel_float_constant_views = false; bool write_fetch_constant_view = false; + // TODO(Triang3l): Update textures and samplers only if shaders or binding + // hash change. + bool write_textures = texture_count != 0; + bool write_samplers = sampler_count != 0; // Update constant buffers. - // TODO(Triang3l): Update the system constant buffer - will crash without it. if (!cbuffer_bindings_system_.up_to_date) { uint8_t* system_constants = constant_buffer_pool_->RequestFull( xe::align(uint32_t(sizeof(system_constants_)), 256u), nullptr, nullptr, @@ -1145,7 +1204,7 @@ bool D3D12CommandProcessor::UpdateBindings( write_fetch_constant_view = true; } - // Update the descriptors. + // Allocate the descriptors. uint32_t view_count_partial_update = 0; if (write_common_constant_views) { // System and bool/loop constants. @@ -1163,8 +1222,11 @@ bool D3D12CommandProcessor::UpdateBindings( // Fetch constants. ++view_count_partial_update; } - // All the constants + shared memory. - uint32_t view_count_full_update = 20; + if (write_textures) { + view_count_partial_update += texture_count; + } + // All the constants + shared memory + textures and samplers. + uint32_t view_count_full_update = 20 + texture_count; D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle; uint32_t view_handle_size = provider->GetDescriptorSizeView(); @@ -1172,16 +1234,30 @@ bool D3D12CommandProcessor::UpdateBindings( draw_view_full_update_, view_count_partial_update, view_count_full_update, view_cpu_handle, view_gpu_handle); if (view_full_update_index == 0) { - XELOGE("View full update index is 0!"); + XELOGE("Failed to allocate view descriptors!"); return false; } + D3D12_CPU_DESCRIPTOR_HANDLE sampler_cpu_handle = {}; + D3D12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle = {}; + uint32_t sampler_handle_size = provider->GetDescriptorSizeSampler(); + uint64_t sampler_full_update_index = 0; + if (sampler_count != 0) { + sampler_full_update_index = RequestSamplerDescriptors( + draw_sampler_full_update_, write_samplers ? sampler_count : 0, + sampler_count, sampler_cpu_handle, sampler_gpu_handle); + if (sampler_full_update_index == 0) { + XELOGE("Failed to allocate sampler descriptors!"); + return false; + } + } if (draw_view_full_update_ != view_full_update_index) { - // Need to update all descriptors. + // Need to update all view descriptors. draw_view_full_update_ = view_full_update_index; write_common_constant_views = true; write_vertex_float_constant_views = true; write_pixel_float_constant_views = true; write_fetch_constant_view = true; + write_textures = texture_count != 0; // If updating fully, write the shared memory descriptor (t0, space1). shared_memory_->CreateSRV(view_cpu_handle); gpu_handle_shared_memory_ = view_gpu_handle; @@ -1189,6 +1265,13 @@ bool D3D12CommandProcessor::UpdateBindings( view_gpu_handle.ptr += view_handle_size; current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_SharedMemory); } + if (sampler_count != 0 && + draw_sampler_full_update_ != sampler_full_update_index) { + draw_sampler_full_update_ = sampler_full_update_index; + write_samplers = true; + } + + // Write the descriptors. D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_desc; if (write_common_constant_views) { gpu_handle_common_constants_ = view_gpu_handle; @@ -1249,6 +1332,62 @@ bool D3D12CommandProcessor::UpdateBindings( view_gpu_handle.ptr += view_handle_size; current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_FetchConstants); } + if (write_textures) { + if (pixel_texture_count != 0) { + assert_true(current_graphics_root_extras_.pixel_textures != + RootExtraParameterIndices::kUnavailable); + gpu_handle_pixel_textures_ = view_gpu_handle; + for (uint32_t i = 0; i < pixel_texture_count; ++i) { + const D3D12Shader::TextureSRV& srv = pixel_textures[i]; + texture_cache_->WriteTextureSRV(srv.fetch_constant, srv.dimension, + view_cpu_handle); + view_cpu_handle.ptr += view_handle_size; + view_gpu_handle.ptr += view_handle_size; + } + current_graphics_root_up_to_date_ &= + ~(1u << current_graphics_root_extras_.pixel_textures); + } + if (vertex_texture_count != 0) { + assert_true(current_graphics_root_extras_.vertex_textures != + RootExtraParameterIndices::kUnavailable); + gpu_handle_vertex_textures_ = view_gpu_handle; + for (uint32_t i = 0; i < vertex_texture_count; ++i) { + const D3D12Shader::TextureSRV& srv = vertex_textures[i]; + texture_cache_->WriteTextureSRV(srv.fetch_constant, srv.dimension, + view_cpu_handle); + view_cpu_handle.ptr += view_handle_size; + view_gpu_handle.ptr += view_handle_size; + } + current_graphics_root_up_to_date_ &= + ~(1u << current_graphics_root_extras_.vertex_textures); + } + } + if (write_samplers) { + if (pixel_sampler_count != 0) { + assert_true(current_graphics_root_extras_.pixel_samplers != + RootExtraParameterIndices::kUnavailable); + gpu_handle_pixel_samplers_ = sampler_gpu_handle; + for (uint32_t i = 0; i < pixel_sampler_count; ++i) { + texture_cache_->WriteSampler(pixel_samplers[i], sampler_cpu_handle); + sampler_cpu_handle.ptr += sampler_handle_size; + sampler_gpu_handle.ptr += sampler_handle_size; + } + current_graphics_root_up_to_date_ &= + ~(1u << current_graphics_root_extras_.pixel_samplers); + } + if (vertex_sampler_count != 0) { + assert_true(current_graphics_root_extras_.vertex_samplers != + RootExtraParameterIndices::kUnavailable); + gpu_handle_vertex_samplers_ = sampler_gpu_handle; + for (uint32_t i = 0; i < vertex_sampler_count; ++i) { + texture_cache_->WriteSampler(vertex_samplers[i], sampler_cpu_handle); + sampler_cpu_handle.ptr += sampler_handle_size; + sampler_gpu_handle.ptr += sampler_handle_size; + } + current_graphics_root_up_to_date_ &= + ~(1u << current_graphics_root_extras_.vertex_samplers); + } + } // Update the root parameters. if (!(current_graphics_root_up_to_date_ & @@ -1284,6 +1423,35 @@ bool D3D12CommandProcessor::UpdateBindings( gpu_handle_shared_memory_); current_graphics_root_up_to_date_ |= 1u << kRootParameter_SharedMemory; } + uint32_t extra_index; + extra_index = current_graphics_root_extras_.pixel_textures; + if (extra_index != RootExtraParameterIndices::kUnavailable && + !(current_graphics_root_up_to_date_ & (1u << extra_index))) { + command_list->SetGraphicsRootDescriptorTable(extra_index, + gpu_handle_pixel_textures_); + current_graphics_root_up_to_date_ |= 1u << extra_index; + } + extra_index = current_graphics_root_extras_.pixel_samplers; + if (extra_index != RootExtraParameterIndices::kUnavailable && + !(current_graphics_root_up_to_date_ & (1u << extra_index))) { + command_list->SetGraphicsRootDescriptorTable(extra_index, + gpu_handle_pixel_samplers_); + current_graphics_root_up_to_date_ |= 1u << extra_index; + } + extra_index = current_graphics_root_extras_.vertex_textures; + if (extra_index != RootExtraParameterIndices::kUnavailable && + !(current_graphics_root_up_to_date_ & (1u << extra_index))) { + command_list->SetGraphicsRootDescriptorTable(extra_index, + gpu_handle_vertex_textures_); + current_graphics_root_up_to_date_ |= 1u << extra_index; + } + extra_index = current_graphics_root_extras_.vertex_samplers; + if (extra_index != RootExtraParameterIndices::kUnavailable && + !(current_graphics_root_up_to_date_ & (1u << extra_index))) { + command_list->SetGraphicsRootDescriptorTable(extra_index, + gpu_handle_vertex_samplers_); + current_graphics_root_up_to_date_ |= 1u << extra_index; + } return true; } diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 2d954ca87..e6f3f95d3 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -110,32 +110,30 @@ class D3D12CommandProcessor : public CommandProcessor { // Never changed - shared memory byte address buffer (t0, space1). kRootParameter_SharedMemory, - kRootParameter_Count_NoTextures, + kRootParameter_Count_Base, - // These are there only if textures are fetched (they are changed pretty - // frequently, but for the ease of maintenance they're in the end). - // If the pixel shader samples textures, these are for pixel textures - // (changed more frequently), otherwise, if the vertex shader samples - // textures, these are for vertex textures. + // Extra parameter that may or may not exist: + // - Pixel textures. + // - Pixel samplers. + // - Vertex textures. + // - Vertex samplers. - // Used textures of all types (t0+, space0). - kRootParameter_PixelOrVertexTextures = kRootParameter_Count_NoTextures, - // Used samplers (s0+). - kRootParameter_PixelOrVertexSamplers, - - kRootParameter_Count_OneStageTextures, - - // These are only present if both pixel and vertex shaders sample textures - // for vertex textures. - - // Used textures of all types (t0+, space0). - kRootParameter_VertexTextures = kRootParameter_Count_OneStageTextures, - // Used samplers (s0+). - kRootParameter_VertexSamplers, - - kRootParameter_Count_TwoStageTextures, + kRootParameter_Count_Max = kRootParameter_Count_Base + 4, }; + struct RootExtraParameterIndices { + uint32_t pixel_textures; + uint32_t pixel_samplers; + uint32_t vertex_textures; + uint32_t vertex_samplers; + static constexpr uint32_t kUnavailable = UINT32_MAX; + }; + // Gets the indices of optional root parameters. Returns the total parameter + // count. + static uint32_t GetRootExtraParameterIndices( + const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, + RootExtraParameterIndices& indices_out); + // Returns true if a new frame was started. bool BeginFrame(); // Returns true if an open frame was ended. @@ -162,6 +160,8 @@ class D3D12CommandProcessor : public CommandProcessor { std::unique_ptr pipeline_cache_ = nullptr; + std::unique_ptr texture_cache_ = nullptr; + std::unique_ptr constant_buffer_pool_ = nullptr; std::unique_ptr view_heap_pool_ = nullptr; std::unique_ptr sampler_heap_pool_ = nullptr; @@ -194,6 +194,8 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12PipelineState* current_pipeline_; // Currently bound graphics root signature. ID3D12RootSignature* current_graphics_root_signature_; + // Extra parameters which may or may not be present. + RootExtraParameterIndices current_graphics_root_extras_; // Whether root parameters are up to date - reset if a new signature is bound. uint32_t current_graphics_root_up_to_date_; @@ -225,6 +227,10 @@ class D3D12CommandProcessor : public CommandProcessor { D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_pixel_float_constants_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_fetch_constants_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_pixel_textures_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_pixel_samplers_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_vertex_textures_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_vertex_samplers_; // Current primitive topology. D3D_PRIMITIVE_TOPOLOGY primitive_topology_; diff --git a/src/xenia/gpu/d3d12/d3d12_shader.cc b/src/xenia/gpu/d3d12/d3d12_shader.cc index 5573392d5..1e049dad4 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.cc +++ b/src/xenia/gpu/d3d12/d3d12_shader.cc @@ -32,6 +32,27 @@ D3D12Shader::~D3D12Shader() { } } +void D3D12Shader::SetTexturesAndSamplers( + const HlslShaderTranslator::TextureSRV* texture_srvs, + uint32_t texture_srv_count, const uint32_t* sampler_fetch_constants, + uint32_t sampler_count) { + for (uint32_t i = 0; i < texture_srv_count; ++i) { + TextureSRV& srv = texture_srvs_[i]; + const HlslShaderTranslator::TextureSRV& translator_srv = texture_srvs[i]; + srv.fetch_constant = translator_srv.fetch_constant; + srv.dimension = translator_srv.dimension; + } + texture_srv_count_ = texture_srv_count; + // If there's a texture, there's a sampler for it. + used_texture_mask_ = 0; + for (uint32_t i = 0; i < sampler_count; ++i) { + uint32_t sampler_fetch_constant = sampler_fetch_constants[i]; + sampler_fetch_constants_[i] = sampler_fetch_constant; + used_texture_mask_ |= 1u << sampler_fetch_constant; + } + sampler_count_ = sampler_count; +} + bool D3D12Shader::Prepare() { assert_null(blob_); assert_true(is_valid()); diff --git a/src/xenia/gpu/d3d12/d3d12_shader.h b/src/xenia/gpu/d3d12/d3d12_shader.h index 893548794..a38ac6f02 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.h +++ b/src/xenia/gpu/d3d12/d3d12_shader.h @@ -10,6 +10,7 @@ #ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_ #define XENIA_GPU_D3D12_D3D12_SHADER_H_ +#include "xenia/gpu/hlsl_shader_translator.h" #include "xenia/gpu/shader.h" #include "xenia/ui/d3d12/d3d12_api.h" @@ -23,17 +24,39 @@ class D3D12Shader : public Shader { const uint32_t* dword_ptr, uint32_t dword_count); ~D3D12Shader() override; + void SetTexturesAndSamplers( + const HlslShaderTranslator::TextureSRV* texture_srvs, + uint32_t texture_srv_count, const uint32_t* sampler_fetch_constants, + uint32_t sampler_count); + bool Prepare(); const uint8_t* GetDXBC() const; size_t GetDXBCSize() const; - // TODO(Triang3l): Real texture counts. - uint32_t GetTextureSRVCount() const { return 0; } - uint32_t GetSamplerCount() const { return 0; } + struct TextureSRV { + uint32_t fetch_constant; + TextureDimension dimension; + }; + const TextureSRV* GetTextureSRVs(uint32_t& count_out) const { + count_out = texture_srv_count_; + return texture_srvs_; + } + const uint32_t* GetSamplerFetchConstants(uint32_t& count_out) const { + count_out = sampler_count_; + return sampler_fetch_constants_; + } + const uint32_t GetUsedTextureMask() const { return used_texture_mask_; } private: ID3DBlob* blob_ = nullptr; + + // Up to 32 2D array textures, 32 3D textures and 32 cube textures. + TextureSRV texture_srvs_[96]; + uint32_t texture_srv_count_ = 0; + uint32_t sampler_fetch_constants_[32]; + uint32_t sampler_count_ = 0; + uint32_t used_texture_mask_ = 0; }; } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 1a50c75cb..1aed37a68 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -27,7 +27,7 @@ namespace d3d12 { PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor, RegisterFile* register_file) : command_processor_(command_processor), register_file_(register_file) { - shader_translator_.reset(new HlslShaderTranslator()); + shader_translator_ = std::make_unique(); // Set pipeline state description values we never change. // Zero out tessellation, stream output, blend state and formats for render @@ -158,6 +158,14 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader, return false; } + uint32_t texture_srv_count, sampler_count; + const HlslShaderTranslator::TextureSRV* texture_srvs = + shader_translator_->GetTextureSRVs(texture_srv_count); + const uint32_t* sampler_fetch_constants = + shader_translator_->GetSamplerFetchConstants(sampler_count); + shader->SetTexturesAndSamplers(texture_srvs, texture_srv_count, + sampler_fetch_constants, sampler_count); + // Prepare the shader for use (creates the Shader Model bytecode). // It could still fail at this point. if (!shader->Prepare()) { diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index 5f4cded64..8a3d89e09 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -15,8 +15,8 @@ #include "third_party/xxhash/xxhash.h" #include "xenia/gpu/d3d12/d3d12_shader.h" +#include "xenia/gpu/hlsl_shader_translator.h" #include "xenia/gpu/register_file.h" -#include "xenia/gpu/shader_translator.h" #include "xenia/gpu/xenos.h" namespace xe { @@ -81,7 +81,7 @@ class PipelineCache { RegisterFile* register_file_; // Reusable shader translator. - std::unique_ptr shader_translator_ = nullptr; + std::unique_ptr shader_translator_ = nullptr; // All loaded shaders mapped by their guest hash key. std::unordered_map shader_map_; diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc new file mode 100644 index 000000000..c59072af2 --- /dev/null +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -0,0 +1,87 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2018 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/d3d12/texture_cache.h" + +#include "xenia/gpu/d3d12/d3d12_command_processor.h" + +namespace xe { +namespace gpu { +namespace d3d12 { + +TextureCache::TextureCache(D3D12CommandProcessor* command_processor, + RegisterFile* register_file) + : command_processor_(command_processor), register_file_(register_file) {} + +TextureCache::~TextureCache() { Shutdown(); } + +void TextureCache::Shutdown() { ClearCache(); } + +void TextureCache::ClearCache() {} + +void TextureCache::WriteTextureSRV(uint32_t fetch_constant, + TextureDimension shader_dimension, + D3D12_CPU_DESCRIPTOR_HANDLE handle) { + // TODO(Triang3l): Real texture descriptors instead of null. + D3D12_SHADER_RESOURCE_VIEW_DESC desc; + desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + switch (shader_dimension) { + case TextureDimension::k3D: + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + desc.Texture3D.MostDetailedMip = 0; + desc.Texture3D.MipLevels = UINT32_MAX; + desc.Texture3D.ResourceMinLODClamp = 0.0f; + break; + case TextureDimension::kCube: + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + desc.TextureCube.MostDetailedMip = 0; + desc.TextureCube.MipLevels = UINT32_MAX; + desc.TextureCube.ResourceMinLODClamp = 0.0f; + break; + default: + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + desc.Texture2DArray.MostDetailedMip = 0; + desc.Texture2DArray.MipLevels = UINT32_MAX; + desc.Texture2DArray.FirstArraySlice = 0; + desc.Texture2DArray.ArraySize = 1; + desc.Texture2DArray.PlaneSlice = 0; + desc.Texture2DArray.ResourceMinLODClamp = 0.0f; + break; + } + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); + device->CreateShaderResourceView(nullptr, &desc, handle); +} + +void TextureCache::WriteSampler(uint32_t fetch_constant, + D3D12_CPU_DESCRIPTOR_HANDLE handle) { + // TODO(Triang3l): Real samplers instead of this dummy. + D3D12_SAMPLER_DESC desc; + desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + desc.MipLODBias = 0.0f; + desc.MaxAnisotropy = 1; + desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + desc.BorderColor[0] = 0.0f; + desc.BorderColor[1] = 0.0f; + desc.BorderColor[2] = 0.0f; + desc.BorderColor[3] = 0.0f; + desc.MinLOD = 0.0f; + desc.MaxLOD = 0.0f; + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); + device->CreateSampler(&desc, handle); +} + +} // namespace d3d12 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h new file mode 100644 index 000000000..2013d0837 --- /dev/null +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -0,0 +1,72 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2018 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D12_TEXTURE_CACHE_H_ +#define XENIA_GPU_D3D12_TEXTURE_CACHE_H_ + +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/d3d12/d3d12_api.h" + +namespace xe { +namespace gpu { +namespace d3d12 { + +class D3D12CommandProcessor; + +// Manages host copies of guest textures, performing untiling, format and endian +// conversion of textures stored in the shared memory, and also handling +// invalidation. +// +// Mipmaps are treated the following way, according to the GPU hang message +// found in game executables explaining the valid usage of BaseAddress when +// streaming the largest LOD (it says games should not use 0 as the base address +// when the largest LOD isn't loaded, but rather, either allocate a valid +// address for it or make it the same as MipAddress): +// - If the texture has a base address, but no mip address, it's not mipmapped - +// the host texture has only the largest level too. +// - If the texture has different non-zero base address and mip address, a host +// texture full mipmap pyramid is created, disregarding min/max LOD and +// treating it purely as sampler state because there are tfetch instructions +// working directly with LOD values - including fetching with an explicit LOD. +// - If the texture has a mip address, but the base address is 0 or the same as +// the mip address, a fully mipmapped texture is created, but min/max LOD is +// clamped to 1 - the game is expected to do that anyway until the largest LOD +// is loaded. +// TODO(Triang3l): Check if there are any games with BaseAddress==MipAddress +// but min or max LOD being 0, especially check Modern Warfare 2/3. +// TODO(Triang3l): Attach the largest LOD to existing textures with a valid +// MipAddress but no BaseAddress to save memory because textures are streamed +// this way anyway. +class TextureCache { + public: + TextureCache(D3D12CommandProcessor* command_processor, + RegisterFile* register_file); + ~TextureCache(); + + void Shutdown(); + + void ClearCache(); + + void WriteTextureSRV(uint32_t fetch_constant, + TextureDimension shader_dimension, + D3D12_CPU_DESCRIPTOR_HANDLE handle); + void WriteSampler(uint32_t fetch_constant, + D3D12_CPU_DESCRIPTOR_HANDLE handle); + + private: + D3D12CommandProcessor* command_processor_; + RegisterFile* register_file_; +}; + +} // namespace d3d12 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_D3D12_TEXTURE_CACHE_H_ diff --git a/src/xenia/gpu/hlsl_shader_translator.cc b/src/xenia/gpu/hlsl_shader_translator.cc index 8e51a7b21..51f6ead50 100644 --- a/src/xenia/gpu/hlsl_shader_translator.cc +++ b/src/xenia/gpu/hlsl_shader_translator.cc @@ -40,8 +40,7 @@ void HlslShaderTranslator::Reset() { writes_depth_ = false; - srv_bindings_.clear(); - + texture_srv_count_ = 0; sampler_count_ = 0; cube_used_ = false; @@ -192,6 +191,37 @@ std::vector HlslShaderTranslator::CompleteTranslation() { "xe_float_constants[8] : register(b2);\n" "\n"); + // Textures and samplers. + for (uint32_t i = 0; i < texture_srv_count_; ++i) { + const TextureSRV& srv = texture_srvs_[i]; + const char* srv_type_dimension; + const char* srv_name_suffix; + switch (srv.dimension) { + case TextureDimension::k3D: + srv_type_dimension = "3D"; + srv_name_suffix = "3d"; + break; + case TextureDimension::kCube: + srv_type_dimension = "Cube"; + srv_name_suffix = "cube"; + break; + default: + srv_type_dimension = "2DArray"; + srv_name_suffix = "2d"; + break; + } + source.AppendFormat( + "Texture%s xe_texture%u_%s : register(t%u, space0);\n", + srv_type_dimension, srv.fetch_constant, srv_name_suffix, i); + } + for (uint32_t i = 0; i < sampler_count_; ++i) { + source.AppendFormat("SamplerState xe_sampler%u : register(s%u);\n", + sampler_fetch_constants_[i], i); + } + if (texture_srv_count_ != 0 || sampler_count_ != 0) { + source.Append("\n"); + } + if (is_vertex_shader()) { // Vertex fetching, output and prologue. // Endian register (2nd word of the fetch constant) is 00 for no swap, 01 @@ -301,6 +331,10 @@ std::vector HlslShaderTranslator::CompleteTranslation() { // Loop counter stack, .x is the active loop. // Represents number of times remaining to loop. " uint4 xe_loop_count = uint4(0u, 0u, 0u, 0u);\n" + // Coordinates for texture fetches. + " float3 xe_texture_coords = float3(0.0, 0.0, 0.0);\n" + // LOD for UseRegisterLOD texture fetches. + " float xe_texture_lod = 0.0f;\n" // Master loop and switch for flow control. " uint xe_pc = 0u;\n" "\n" @@ -1068,19 +1102,22 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction( EndPredicatedInstruction(conditional_emitted); } -uint32_t HlslShaderTranslator::AddSRVBinding(SRVType type, - uint32_t fetch_constant) { - for (uint32_t i = 0; i < srv_bindings_.size(); ++i) { - const SRVBinding& binding = srv_bindings_[i]; - if (binding.type == type && binding.fetch_constant == fetch_constant) { +uint32_t HlslShaderTranslator::AddTextureSRV(uint32_t fetch_constant, + TextureDimension dimension) { + if (dimension == TextureDimension::k1D) { + // 1D textures are treated as 2D. + dimension = TextureDimension::k2D; + } + for (uint32_t i = 0; i < texture_srv_count_; ++i) { + const TextureSRV& srv = texture_srvs_[i]; + if (srv.fetch_constant == fetch_constant && srv.dimension == dimension) { return i; } } - SRVBinding new_binding; - new_binding.type = type; - new_binding.fetch_constant = fetch_constant; - srv_bindings_.push_back(new_binding); - return uint32_t(srv_bindings_.size() - 1); + TextureSRV& new_srv = texture_srvs_[texture_srv_count_]; + new_srv.fetch_constant = fetch_constant; + new_srv.dimension = dimension; + return texture_srv_count_++; } uint32_t HlslShaderTranslator::AddSampler(uint32_t fetch_constant) { @@ -1101,6 +1138,19 @@ void HlslShaderTranslator::ProcessTextureFetchInstruction( bool conditional_emitted = BeginPredicatedInstruction( instr.is_predicated, instr.predicate_condition); + if (instr.opcode == FetchOpcode::kSetTextureLod) { + // TODO(Triang3l): Set xe_lod to the src1. + } else { + uint32_t tfetch_index = instr.operands[1].storage_index; + AddTextureSRV(tfetch_index, instr.dimension); + if (instr.dimension == TextureDimension::k3D) { + // tfetch3D is used for both 3D textures and 2D texture arrays, this is + // chosen dynamically. + AddTextureSRV(tfetch_index, TextureDimension::k2D); + } + AddSampler(tfetch_index); + } + // TODO(Triang3l): Texture fetch when textures are added. EmitSourceDepth("xe_pv = (1.0).xxxx;\n"); diff --git a/src/xenia/gpu/hlsl_shader_translator.h b/src/xenia/gpu/hlsl_shader_translator.h index 3488800ae..95b3d2bc4 100644 --- a/src/xenia/gpu/hlsl_shader_translator.h +++ b/src/xenia/gpu/hlsl_shader_translator.h @@ -40,21 +40,18 @@ class HlslShaderTranslator : public ShaderTranslator { uint32_t textures_are_3d; }; - enum class SRVType : uint32_t { - // 1D, 2D or stacked texture bound as a 2D array texture. - Texture2DArray, - // 3D texture (also has a 2D array view of the same fetch registers because - // tfetch3D is used for both stacked and 3D textures. - Texture3D, - // Cube texture. - TextureCube - }; - - struct SRVBinding { - SRVType type : 2; - // 0-31 for textures, 0-95 for vertex buffers. - uint32_t fetch_constant : 7; + struct TextureSRV { + uint32_t fetch_constant; + TextureDimension dimension; }; + const TextureSRV* GetTextureSRVs(uint32_t& count_out) const { + count_out = texture_srv_count_; + return texture_srvs_; + } + const uint32_t* GetSamplerFetchConstants(uint32_t& count_out) const { + count_out = sampler_count_; + return sampler_fetch_constants_; + } protected: void Reset() override; @@ -108,9 +105,11 @@ class HlslShaderTranslator : public ShaderTranslator { bool writes_depth_ = false; - std::vector srv_bindings_; - // Finds or adds an SRV binding to the shader's SRV list, returns t# index. - uint32_t AddSRVBinding(SRVType type, uint32_t fetch_constant); + // Up to 32 2D array textures, 32 3D textures and 32 cube textures. + TextureSRV texture_srvs_[96]; + uint32_t texture_srv_count_ = 0; + // Finds or adds a texture binding to the shader's SRV list, returns t# index. + uint32_t AddTextureSRV(uint32_t fetch_constant, TextureDimension dimension); // Sampler index -> fetch constant index mapping. // TODO(Triang3l): On binding tier 1 (Nvidia Fermi), there can't be more than