/** ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** * Copyright 2018 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/gpu/d3d12/d3d12_command_processor.h" #include #include "third_party/xxhash/xxhash.h" #include #include #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/d3d12/d3d12_util.h" // Some games (such as Banjo-Kazooie) are not aware of the half-pixel offset and // may be blurry or have texture sampling artifacts, in this case the user may // disable half-pixel offset by setting this to false. DEFINE_bool(d3d12_half_pixel_offset, true, "Enable half-pixel vertex and VPOS offset"); // Disabled because the current positions look worse than sampling at centers. DEFINE_bool(d3d12_programmable_sample_positions, false, "Enable custom SSAA sample positions where available"); DEFINE_bool(d3d12_rov, false, "Use rasterizer-ordered views for render target emulation where " "available."); namespace xe { namespace gpu { namespace d3d12 { constexpr uint32_t D3D12CommandProcessor::RootExtraParameterIndices::kUnavailable; constexpr uint32_t D3D12CommandProcessor::kSwapTextureWidth; constexpr uint32_t D3D12CommandProcessor::kSwapTextureHeight; constexpr uint32_t D3D12CommandProcessor::kScratchBufferSizeIncrement; D3D12CommandProcessor::D3D12CommandProcessor( D3D12GraphicsSystem* graphics_system, kernel::KernelState* kernel_state) : CommandProcessor(graphics_system, kernel_state) {} D3D12CommandProcessor::~D3D12CommandProcessor() = default; void D3D12CommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); cache_clear_requested_ = true; } void D3D12CommandProcessor::RequestFrameTrace(const std::wstring& root_path) { // Capture with PIX if attached. if (GetD3D12Context()->GetD3D12Provider()->GetGraphicsAnalysis() != nullptr) { pix_capture_requested_.store(true, std::memory_order_relaxed); return; } CommandProcessor::RequestFrameTrace(root_path); } ID3D12GraphicsCommandList* D3D12CommandProcessor::GetCurrentCommandList() const { assert_true(current_queue_frame_ != UINT_MAX); if (current_queue_frame_ == UINT_MAX) { return nullptr; } return command_lists_[current_queue_frame_]->GetCommandList(); } ID3D12GraphicsCommandList1* D3D12CommandProcessor::GetCurrentCommandList1() const { assert_true(current_queue_frame_ != UINT_MAX); if (current_queue_frame_ == UINT_MAX) { return nullptr; } return command_lists_[current_queue_frame_]->GetCommandList1(); } bool D3D12CommandProcessor::IsROVUsedForEDRAM() const { if (!FLAGS_d3d12_rov) { return false; } auto provider = GetD3D12Context()->GetD3D12Provider(); return provider->AreRasterizerOrderedViewsSupported(); } uint32_t D3D12CommandProcessor::GetCurrentColorMask( const D3D12Shader* pixel_shader) const { if (pixel_shader == nullptr) { return 0; } auto& regs = *register_file_; uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF; for (uint32_t i = 0; i < 4; ++i) { if (!pixel_shader->writes_color_target(i)) { color_mask &= ~(0xF << (i * 4)); } } return color_mask; } void D3D12CommandProcessor::PushTransitionBarrier( ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state, D3D12_RESOURCE_STATES new_state, UINT subresource) { if (old_state == new_state) { return; } D3D12_RESOURCE_BARRIER barrier; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.Transition.pResource = resource; barrier.Transition.Subresource = subresource; barrier.Transition.StateBefore = old_state; barrier.Transition.StateAfter = new_state; barriers_.push_back(barrier); } void D3D12CommandProcessor::PushAliasingBarrier(ID3D12Resource* old_resource, ID3D12Resource* new_resource) { D3D12_RESOURCE_BARRIER barrier; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.Aliasing.pResourceBefore = old_resource; barrier.Aliasing.pResourceAfter = new_resource; barriers_.push_back(barrier); } void D3D12CommandProcessor::PushUAVBarrier(ID3D12Resource* resource) { D3D12_RESOURCE_BARRIER barrier; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.UAV.pResource = resource; barriers_.push_back(barrier); } void D3D12CommandProcessor::SubmitBarriers() { UINT barrier_count = UINT(barriers_.size()); if (barrier_count != 0) { GetCurrentCommandList()->ResourceBarrier(barrier_count, barriers_.data()); barriers_.clear(); } } ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) { assert_true(vertex_shader->is_translated()); assert_true(pixel_shader == nullptr || pixel_shader->is_translated()); uint32_t texture_count_vertex, sampler_count_vertex; vertex_shader->GetTextureSRVs(texture_count_vertex); vertex_shader->GetSamplerBindings(sampler_count_vertex); uint32_t texture_count_pixel = 0, sampler_count_pixel = 0; if (pixel_shader != nullptr) { pixel_shader->GetTextureSRVs(texture_count_pixel); pixel_shader->GetSamplerBindings(sampler_count_pixel); } // Better put the pixel texture/sampler in the lower bits probably because it // changes often. uint32_t index = 0; uint32_t index_offset = 0; index |= texture_count_pixel << index_offset; index_offset += D3D12Shader::kMaxTextureSRVIndexBits; index |= sampler_count_pixel << index_offset; index_offset += D3D12Shader::kMaxSamplerBindingIndexBits; index |= texture_count_vertex << index_offset; index_offset += D3D12Shader::kMaxTextureSRVIndexBits; index |= sampler_count_vertex << index_offset; index_offset += D3D12Shader::kMaxSamplerBindingIndexBits; assert_true(index_offset <= 32); // Try an existing root signature. auto it = root_signatures_.find(index); if (it != root_signatures_.end()) { return it->second; } // Create a new one. D3D12_ROOT_SIGNATURE_DESC desc; D3D12_ROOT_PARAMETER parameters[kRootParameter_Count_Max]; D3D12_DESCRIPTOR_RANGE ranges[kRootParameter_Count_Max]; desc.NumParameters = kRootParameter_Count_Base; desc.pParameters = parameters; desc.NumStaticSamplers = 0; desc.pStaticSamplers = nullptr; desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; // Base parameters. // Fetch constants. { auto& parameter = parameters[kRootParameter_FetchConstants]; auto& range = ranges[kRootParameter_FetchConstants]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; range.NumDescriptors = 1; range.BaseShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants); range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = 0; } // Vertex float constants. { auto& parameter = parameters[kRootParameter_FloatConstantsVertex]; auto& range = ranges[kRootParameter_FloatConstantsVertex]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; range.NumDescriptors = 1; range.BaseShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants); range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = 0; } // Pixel float constants. { auto& parameter = parameters[kRootParameter_FloatConstantsPixel]; auto& range = ranges[kRootParameter_FloatConstantsPixel]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; range.NumDescriptors = 1; range.BaseShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants); range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = 0; } // System constants. { auto& parameter = parameters[kRootParameter_SystemConstants]; auto& range = ranges[kRootParameter_SystemConstants]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; range.NumDescriptors = 1; range.BaseShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants); range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = 0; } // Bool and loop constants. { auto& parameter = parameters[kRootParameter_BoolLoopConstants]; auto& range = ranges[kRootParameter_BoolLoopConstants]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; range.NumDescriptors = 1; range.BaseShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kBoolLoopConstants); range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = 0; } // Shared memory and, if ROVs are used, EDRAM. D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[2]; { auto& parameter = parameters[kRootParameter_SharedMemoryAndEDRAM]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = shared_memory_and_edram_ranges; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; shared_memory_and_edram_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; shared_memory_and_edram_ranges[0].NumDescriptors = 1; shared_memory_and_edram_ranges[0].BaseShaderRegister = 0; shared_memory_and_edram_ranges[0].RegisterSpace = 0; shared_memory_and_edram_ranges[0].OffsetInDescriptorsFromTableStart = 0; if (IsROVUsedForEDRAM()) { ++parameter.DescriptorTable.NumDescriptorRanges; shared_memory_and_edram_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; shared_memory_and_edram_ranges[1].NumDescriptors = 1; shared_memory_and_edram_ranges[1].BaseShaderRegister = 0; shared_memory_and_edram_ranges[1].RegisterSpace = 0; shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1; } } // Extra parameters. // Pixel textures. if (texture_count_pixel > 0) { auto& parameter = parameters[desc.NumParameters]; auto& range = ranges[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; range.NumDescriptors = texture_count_pixel; range.BaseShaderRegister = 1; range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } // Pixel samplers. if (sampler_count_pixel > 0) { auto& parameter = parameters[desc.NumParameters]; auto& range = ranges[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; range.NumDescriptors = sampler_count_pixel; range.BaseShaderRegister = 0; range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } // Vertex textures. if (texture_count_vertex > 0) { auto& parameter = parameters[desc.NumParameters]; auto& range = ranges[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; range.NumDescriptors = texture_count_vertex; range.BaseShaderRegister = 1; range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } // Vertex samplers. if (sampler_count_vertex > 0) { auto& parameter = parameters[desc.NumParameters]; auto& range = ranges[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; range.NumDescriptors = sampler_count_vertex; range.BaseShaderRegister = 0; range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } ID3D12RootSignature* root_signature = ui::d3d12::util::CreateRootSignature( GetD3D12Context()->GetD3D12Provider(), desc); if (root_signature == nullptr) { XELOGE( "Failed to create a root signature with %u pixel textures, %u pixel " "samplers, %u vertex textures and %u vertex samplers", texture_count_pixel, sampler_count_pixel, texture_count_vertex, sampler_count_vertex); return nullptr; } root_signatures_.insert({index, root_signature}); return root_signature; } uint32_t D3D12CommandProcessor::GetRootExtraParameterIndices( const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, RootExtraParameterIndices& indices_out) { uint32_t texture_count_pixel = 0, sampler_count_pixel = 0; if (pixel_shader != nullptr) { pixel_shader->GetTextureSRVs(texture_count_pixel); pixel_shader->GetSamplerBindings(sampler_count_pixel); } uint32_t texture_count_vertex, sampler_count_vertex; vertex_shader->GetTextureSRVs(texture_count_vertex); vertex_shader->GetSamplerBindings(sampler_count_vertex); uint32_t index = kRootParameter_Count_Base; if (texture_count_pixel != 0) { indices_out.textures_pixel = index++; } else { indices_out.textures_pixel = RootExtraParameterIndices::kUnavailable; } if (sampler_count_pixel != 0) { indices_out.samplers_pixel = index++; } else { indices_out.samplers_pixel = RootExtraParameterIndices::kUnavailable; } if (texture_count_vertex != 0) { indices_out.textures_vertex = index++; } else { indices_out.textures_vertex = RootExtraParameterIndices::kUnavailable; } if (sampler_count_vertex != 0) { indices_out.samplers_vertex = index++; } else { indices_out.samplers_vertex = RootExtraParameterIndices::kUnavailable; } return index; } uint64_t D3D12CommandProcessor::RequestViewDescriptors( uint64_t previous_full_update, uint32_t count_for_partial_update, uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) { uint32_t descriptor_index; uint64_t current_full_update = view_heap_pool_->Request(previous_full_update, count_for_partial_update, count_for_full_update, descriptor_index); if (current_full_update == 0) { // There was an error. return 0; } ID3D12DescriptorHeap* heap = view_heap_pool_->GetLastRequestHeap(); if (current_view_heap_ != heap) { // Bind the new descriptor heaps if needed. current_view_heap_ = heap; ID3D12DescriptorHeap* heaps[2]; uint32_t heap_count = 0; heaps[heap_count++] = heap; if (current_sampler_heap_ != nullptr) { heaps[heap_count++] = current_sampler_heap_; } GetCurrentCommandList()->SetDescriptorHeaps(heap_count, heaps); } auto provider = GetD3D12Context()->GetD3D12Provider(); cpu_handle_out = provider->OffsetViewDescriptor( view_heap_pool_->GetLastRequestHeapCPUStart(), descriptor_index); gpu_handle_out = provider->OffsetViewDescriptor( view_heap_pool_->GetLastRequestHeapGPUStart(), descriptor_index); return current_full_update; } uint64_t D3D12CommandProcessor::RequestSamplerDescriptors( uint64_t previous_full_update, uint32_t count_for_partial_update, uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) { uint32_t descriptor_index; uint64_t current_full_update = sampler_heap_pool_->Request( previous_full_update, count_for_partial_update, count_for_full_update, descriptor_index); if (current_full_update == 0) { // There was an error. return 0; } ID3D12DescriptorHeap* heap = sampler_heap_pool_->GetLastRequestHeap(); if (current_sampler_heap_ != heap) { // Bind the new descriptor heaps if needed. current_sampler_heap_ = heap; ID3D12DescriptorHeap* heaps[2]; uint32_t heap_count = 0; heaps[heap_count++] = heap; if (current_view_heap_ != nullptr) { heaps[heap_count++] = current_view_heap_; } GetCurrentCommandList()->SetDescriptorHeaps(heap_count, heaps); } uint32_t descriptor_offset = descriptor_index * GetD3D12Context()->GetD3D12Provider()->GetSamplerDescriptorSize(); cpu_handle_out.ptr = sampler_heap_pool_->GetLastRequestHeapCPUStart().ptr + descriptor_offset; gpu_handle_out.ptr = sampler_heap_pool_->GetLastRequestHeapGPUStart().ptr + descriptor_offset; return current_full_update; } ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( uint32_t size, D3D12_RESOURCE_STATES state) { assert_true(current_queue_frame_ != UINT_MAX); assert_false(scratch_buffer_used_); if (current_queue_frame_ == UINT_MAX || scratch_buffer_used_ || size == 0) { return nullptr; } if (size <= scratch_buffer_size_) { PushTransitionBarrier(scratch_buffer_, scratch_buffer_state_, state); scratch_buffer_state_ = state; scratch_buffer_used_ = true; return scratch_buffer_; } size = xe::align(size, kScratchBufferSizeIncrement); auto context = GetD3D12Context(); auto device = context->GetD3D12Provider()->GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc( buffer_desc, size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, &buffer_desc, state, nullptr, IID_PPV_ARGS(&buffer)))) { XELOGE("Failed to create a %u MB scratch GPU buffer", size >> 20); return nullptr; } if (scratch_buffer_ != nullptr) { BufferForDeletion buffer_for_deletion; buffer_for_deletion.buffer = scratch_buffer_; buffer_for_deletion.last_usage_frame = GetD3D12Context()->GetCurrentFrame(); buffers_for_deletion_.push_back(buffer_for_deletion); } scratch_buffer_ = buffer; scratch_buffer_size_ = size; scratch_buffer_state_ = state; scratch_buffer_used_ = true; return scratch_buffer_; } void D3D12CommandProcessor::ReleaseScratchGPUBuffer( ID3D12Resource* buffer, D3D12_RESOURCE_STATES new_state) { assert_true(current_queue_frame_ != UINT_MAX); assert_true(scratch_buffer_used_); scratch_buffer_used_ = false; if (buffer == scratch_buffer_) { scratch_buffer_state_ = new_state; } } void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) { if (current_sample_positions_ == sample_positions) { return; } if (FLAGS_d3d12_programmable_sample_positions) { auto provider = GetD3D12Context()->GetD3D12Provider(); auto tier = provider->GetProgrammableSamplePositionsTier(); auto command_list = GetCurrentCommandList1(); if (tier >= 2 && command_list != nullptr) { // Depth buffer transitions are affected by sample positions. SubmitBarriers(); // Standard sample positions in Direct3D 10.1, but adjusted to take the // fact that SSAA samples are already shifted by 1/4 of a pixel. // TODO(Triang3l): Find what sample positions are used by Xenos, though // they are not necessarily better. The purpose is just to make 2x SSAA // work a little bit better for tall stairs. // FIXME(Triang3l): This is currently even uglier than without custom // sample positions. if (sample_positions >= MsaaSamples::k2X) { // Sample 1 is lower-left on Xenos, but upper-right in Direct3D 12. D3D12_SAMPLE_POSITION d3d_sample_positions[4]; if (sample_positions >= MsaaSamples::k4X) { // Upper-left. d3d_sample_positions[0].X = -2 + 4; d3d_sample_positions[0].Y = -6 + 4; // Upper-right. d3d_sample_positions[1].X = 6 - 4; d3d_sample_positions[1].Y = -2 + 4; // Lower-left. d3d_sample_positions[2].X = -6 + 4; d3d_sample_positions[2].Y = 2 - 4; // Lower-right. d3d_sample_positions[3].X = 2 - 4; d3d_sample_positions[3].Y = 6 - 4; } else { // Upper. d3d_sample_positions[0].X = -4; d3d_sample_positions[0].Y = -4 + 4; d3d_sample_positions[1].X = -4; d3d_sample_positions[1].Y = -4 + 4; // Lower. d3d_sample_positions[2].X = 4; d3d_sample_positions[2].Y = 4 - 4; d3d_sample_positions[3].X = 4; d3d_sample_positions[3].Y = 4 - 4; } command_list->SetSamplePositions(1, 4, d3d_sample_positions); } else { command_list->SetSamplePositions(0, 0, nullptr); } } } current_sample_positions_ = sample_positions; } void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) { if (current_pipeline_ != pipeline) { GetCurrentCommandList()->SetPipelineState(pipeline); current_pipeline_ = pipeline; } } void D3D12CommandProcessor::UnbindRenderTargets() { render_target_cache_->UnbindRenderTargets(); } void D3D12CommandProcessor::SetExternalGraphicsPipeline( ID3D12PipelineState* pipeline, bool reset_viewport, bool reset_blend_factor, bool reset_stencil_ref) { if (current_pipeline_ != pipeline) { GetCurrentCommandList()->SetPipelineState(pipeline); current_pipeline_ = pipeline; } current_graphics_root_signature_ = nullptr; current_graphics_root_up_to_date_ = 0; primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; if (reset_viewport) { ff_viewport_update_needed_ = true; ff_scissor_update_needed_ = true; } if (reset_blend_factor) { ff_blend_factor_update_needed_ = true; } if (reset_stencil_ref) { ff_stencil_ref_update_needed_ = true; } } bool D3D12CommandProcessor::SetupContext() { if (!CommandProcessor::SetupContext()) { XELOGE("Failed to initialize base command processor context"); return false; } auto context = GetD3D12Context(); auto provider = context->GetD3D12Provider(); auto device = provider->GetDevice(); auto direct_queue = provider->GetDirectQueue(); for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) { command_lists_[i] = ui::d3d12::CommandList::Create( device, direct_queue, D3D12_COMMAND_LIST_TYPE_DIRECT); if (command_lists_[i] == nullptr) { XELOGE("Failed to create the command lists"); return false; } } constant_buffer_pool_ = std::make_unique(context, 1024 * 1024); view_heap_pool_ = std::make_unique( context, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 32768); // Can't create a shader-visible heap with more than 2048 samplers. sampler_heap_pool_ = std::make_unique( context, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2048); shared_memory_ = std::make_unique(this, memory_); if (!shared_memory_->Initialize()) { XELOGE("Failed to initialize shared memory"); return false; } texture_cache_ = std::make_unique(this, register_file_, shared_memory_.get()); if (!texture_cache_->Initialize()) { XELOGE("Failed to initialize the texture cache"); return false; } render_target_cache_ = std::make_unique(this, register_file_); if (!render_target_cache_->Initialize()) { XELOGE("Failed to initialize the render target cache"); return false; } pipeline_cache_ = std::make_unique(this, register_file_, IsROVUsedForEDRAM()); primitive_converter_ = std::make_unique(this, register_file_, memory_); if (!primitive_converter_->Initialize()) { XELOGE("Failed to initialize the geometric primitive converter"); return false; } // Create gamma ramp resources. The PWL gamma ramp is 16-bit, but 6 bits are // hardwired to zero, so DXGI_FORMAT_R10G10B10A2_UNORM can be used for it too. // https://www.x.org/docs/AMD/old/42590_m76_rrg_1.01o.pdf dirty_gamma_ramp_normal_ = true; dirty_gamma_ramp_pwl_ = true; D3D12_RESOURCE_DESC gamma_ramp_desc; gamma_ramp_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE1D; gamma_ramp_desc.Alignment = 0; gamma_ramp_desc.Width = 256; gamma_ramp_desc.Height = 1; gamma_ramp_desc.DepthOrArraySize = 1; // Normal gamma is 256x1, PWL gamma is 128x1. gamma_ramp_desc.MipLevels = 2; gamma_ramp_desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; gamma_ramp_desc.SampleDesc.Count = 1; gamma_ramp_desc.SampleDesc.Quality = 0; gamma_ramp_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; gamma_ramp_desc.Flags = D3D12_RESOURCE_FLAG_NONE; // The first action will be uploading. gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST; if (FAILED(device->CreateCommittedResource( &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, &gamma_ramp_desc, gamma_ramp_texture_state_, nullptr, IID_PPV_ARGS(&gamma_ramp_texture_)))) { XELOGE("Failed to create the gamma ramp texture"); return false; } // Get the layout for the upload buffer. gamma_ramp_desc.DepthOrArraySize = ui::d3d12::D3D12Context::kQueuedFrames; UINT64 gamma_ramp_upload_size; device->GetCopyableFootprints( &gamma_ramp_desc, 0, ui::d3d12::D3D12Context::kQueuedFrames * 2, 0, gamma_ramp_footprints_, nullptr, nullptr, &gamma_ramp_upload_size); // Create the upload buffer for the gamma ramp. ui::d3d12::util::FillBufferResourceDesc( gamma_ramp_desc, gamma_ramp_upload_size, D3D12_RESOURCE_FLAG_NONE); if (FAILED(device->CreateCommittedResource( &ui::d3d12::util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, &gamma_ramp_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&gamma_ramp_upload_)))) { XELOGE("Failed to create the gamma ramp upload buffer"); return false; } if (FAILED(gamma_ramp_upload_->Map( 0, nullptr, reinterpret_cast(&gamma_ramp_upload_mapping_)))) { XELOGE("Failed to map the gamma ramp upload buffer"); return false; } D3D12_RESOURCE_DESC swap_texture_desc; swap_texture_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; swap_texture_desc.Alignment = 0; swap_texture_desc.Width = kSwapTextureWidth; swap_texture_desc.Height = kSwapTextureHeight; swap_texture_desc.DepthOrArraySize = 1; swap_texture_desc.MipLevels = 1; swap_texture_desc.Format = ui::d3d12::D3D12Context::kSwapChainFormat; swap_texture_desc.SampleDesc.Count = 1; swap_texture_desc.SampleDesc.Quality = 0; swap_texture_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; swap_texture_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; // Can be sampled at any time, switch to render target when needed, then back. if (FAILED(device->CreateCommittedResource( &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, &swap_texture_desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, nullptr, IID_PPV_ARGS(&swap_texture_)))) { XELOGE("Failed to create the command processor front buffer"); return false; } D3D12_DESCRIPTOR_HEAP_DESC swap_descriptor_heap_desc; swap_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; swap_descriptor_heap_desc.NumDescriptors = 1; swap_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; swap_descriptor_heap_desc.NodeMask = 0; if (FAILED(device->CreateDescriptorHeap( &swap_descriptor_heap_desc, IID_PPV_ARGS(&swap_texture_rtv_descriptor_heap_)))) { XELOGE("Failed to create the command processor front buffer RTV heap"); return false; } swap_texture_rtv_ = swap_texture_rtv_descriptor_heap_->GetCPUDescriptorHandleForHeapStart(); D3D12_RENDER_TARGET_VIEW_DESC swap_rtv_desc; swap_rtv_desc.Format = ui::d3d12::D3D12Context::kSwapChainFormat; swap_rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; swap_rtv_desc.Texture2D.MipSlice = 0; swap_rtv_desc.Texture2D.PlaneSlice = 0; device->CreateRenderTargetView(swap_texture_, &swap_rtv_desc, swap_texture_rtv_); swap_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; swap_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; if (FAILED(device->CreateDescriptorHeap( &swap_descriptor_heap_desc, IID_PPV_ARGS(&swap_texture_srv_descriptor_heap_)))) { XELOGE("Failed to create the command processor front buffer SRV heap"); return false; } D3D12_SHADER_RESOURCE_VIEW_DESC swap_srv_desc; swap_srv_desc.Format = ui::d3d12::D3D12Context::kSwapChainFormat; swap_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; swap_srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; swap_srv_desc.Texture2D.MostDetailedMip = 0; swap_srv_desc.Texture2D.MipLevels = 1; swap_srv_desc.Texture2D.PlaneSlice = 0; swap_srv_desc.Texture2D.ResourceMinLODClamp = 0.0f; device->CreateShaderResourceView( swap_texture_, &swap_srv_desc, swap_texture_srv_descriptor_heap_->GetCPUDescriptorHandleForHeapStart()); pix_capture_requested_.store(false, std::memory_order_relaxed); pix_capturing_ = false; // Just not to expose uninitialized memory. std::memset(&system_constants_, 0, sizeof(system_constants_)); // Force writing of new format data. std::memset(system_constants_color_formats_, 0xFF, sizeof(system_constants_color_formats_)); return true; } void D3D12CommandProcessor::ShutdownContext() { auto context = GetD3D12Context(); context->AwaitAllFramesCompletion(); ui::d3d12::util::ReleaseAndNull(scratch_buffer_); scratch_buffer_size_ = 0; for (auto& buffer_for_deletion : buffers_for_deletion_) { buffer_for_deletion.buffer->Release(); } buffers_for_deletion_.clear(); if (swap_texture_srv_descriptor_heap_ != nullptr) { { std::lock_guard lock(swap_state_.mutex); swap_state_.pending = false; swap_state_.front_buffer_texture = 0; } auto graphics_system = static_cast(graphics_system_); graphics_system->AwaitFrontBufferUnused(); swap_texture_srv_descriptor_heap_->Release(); swap_texture_srv_descriptor_heap_ = nullptr; } ui::d3d12::util::ReleaseAndNull(swap_texture_rtv_descriptor_heap_); ui::d3d12::util::ReleaseAndNull(swap_texture_); // Don't need the data anymore, so zero range. if (gamma_ramp_upload_mapping_ != nullptr) { D3D12_RANGE gamma_ramp_written_range; gamma_ramp_written_range.Begin = 0; gamma_ramp_written_range.End = 0; gamma_ramp_upload_->Unmap(0, &gamma_ramp_written_range); gamma_ramp_upload_mapping_ = nullptr; } ui::d3d12::util::ReleaseAndNull(gamma_ramp_upload_); ui::d3d12::util::ReleaseAndNull(gamma_ramp_texture_); sampler_heap_pool_.reset(); view_heap_pool_.reset(); constant_buffer_pool_.reset(); primitive_converter_.reset(); pipeline_cache_.reset(); render_target_cache_.reset(); texture_cache_.reset(); // Root signatured are used by pipelines, thus freed after the pipelines. for (auto it : root_signatures_) { it.second->Release(); } root_signatures_.clear(); shared_memory_.reset(); for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) { command_lists_[i].reset(); } CommandProcessor::ShutdownContext(); } void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { CommandProcessor::WriteRegister(index, value); if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X && index <= XE_GPU_REG_SHADER_CONSTANT_511_W) { if (current_queue_frame_ != UINT32_MAX) { uint32_t float_constant_index = (index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2; if (float_constant_index >= 256) { float_constant_index -= 256; if (current_float_constant_map_pixel_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { cbuffer_bindings_float_pixel_.up_to_date = false; } } else { if (current_float_constant_map_vertex_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { cbuffer_bindings_float_vertex_.up_to_date = false; } } } } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { cbuffer_bindings_bool_loop_.up_to_date = false; } else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 && index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { cbuffer_bindings_fetch_.up_to_date = false; if (texture_cache_ != nullptr) { texture_cache_->TextureFetchConstantWritten( (index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6); } } else if (index == XE_GPU_REG_DC_LUT_PWL_DATA) { UpdateGammaRampValue(GammaRampType::kPWL, value); } else if (index == XE_GPU_REG_DC_LUT_30_COLOR) { UpdateGammaRampValue(GammaRampType::kNormal, value); } else if (index == XE_GPU_REG_DC_LUT_RW_MODE) { gamma_ramp_rw_subindex_ = 0; } } void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) { SCOPE_profile_cpu_f("gpu"); // In case the swap command is the only one in the frame. BeginFrame(); auto provider = GetD3D12Context()->GetD3D12Provider(); auto device = provider->GetDevice(); auto command_list = GetCurrentCommandList(); // Upload the new gamma ramps. if (dirty_gamma_ramp_normal_) { const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint = gamma_ramp_footprints_[current_queue_frame_ * 2]; std::memcpy(gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset, gamma_ramp_.normal, 256 * sizeof(uint32_t)); PushTransitionBarrier(gamma_ramp_texture_, gamma_ramp_texture_state_, D3D12_RESOURCE_STATE_COPY_DEST); gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST; SubmitBarriers(); D3D12_TEXTURE_COPY_LOCATION location_source, location_dest; location_source.pResource = gamma_ramp_upload_; location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; location_source.PlacedFootprint = gamma_ramp_footprint; location_dest.pResource = gamma_ramp_texture_; location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; location_dest.SubresourceIndex = 0; command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, nullptr); dirty_gamma_ramp_normal_ = false; } if (dirty_gamma_ramp_pwl_) { const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint = gamma_ramp_footprints_[current_queue_frame_ * 2 + 1]; volatile uint32_t* mapping = reinterpret_cast( gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset); for (uint32_t i = 0; i < 128; ++i) { mapping[i] = (gamma_ramp_.pwl[i].values[0].base >> 6) | (uint32_t(gamma_ramp_.pwl[i].values[1].base >> 6) << 10) | (uint32_t(gamma_ramp_.pwl[i].values[2].base >> 6) << 20); } PushTransitionBarrier(gamma_ramp_texture_, gamma_ramp_texture_state_, D3D12_RESOURCE_STATE_COPY_DEST); gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST; SubmitBarriers(); D3D12_TEXTURE_COPY_LOCATION location_source, location_dest; location_source.pResource = gamma_ramp_upload_; location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; location_source.PlacedFootprint = gamma_ramp_footprint; location_dest.pResource = gamma_ramp_texture_; location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; location_dest.SubresourceIndex = 1; command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, nullptr); dirty_gamma_ramp_pwl_ = false; } D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; if (RequestViewDescriptors(0, 2, 2, descriptor_cpu_start, descriptor_gpu_start) != 0) { TextureFormat frontbuffer_format; if (texture_cache_->RequestSwapTexture(descriptor_cpu_start, frontbuffer_format)) { render_target_cache_->UnbindRenderTargets(); // Create the gamma ramp texture descriptor. // This is according to D3D::InitializePresentationParameters from a game // executable, which initializes the normal gamma ramp for 8_8_8_8 output // and the PWL gamma ramp for 2_10_10_10. bool use_pwl_gamma_ramp = frontbuffer_format == TextureFormat::k_2_10_10_10 || frontbuffer_format == TextureFormat::k_2_10_10_10_AS_16_16_16_16; D3D12_SHADER_RESOURCE_VIEW_DESC gamma_ramp_srv_desc; gamma_ramp_srv_desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; gamma_ramp_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; gamma_ramp_srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; gamma_ramp_srv_desc.Texture1D.MostDetailedMip = use_pwl_gamma_ramp ? 1 : 0; gamma_ramp_srv_desc.Texture1D.MipLevels = 1; gamma_ramp_srv_desc.Texture1D.ResourceMinLODClamp = 0.0f; device->CreateShaderResourceView( gamma_ramp_texture_, &gamma_ramp_srv_desc, provider->OffsetViewDescriptor(descriptor_cpu_start, 1)); // The swap texture is kept as an SRV because the graphics system may draw // with it at any time. It's switched to RTV and back when needed. PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); PushTransitionBarrier(gamma_ramp_texture_, gamma_ramp_texture_state_, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; SubmitBarriers(); // Draw the stretching rectangle. command_list->OMSetRenderTargets(1, &swap_texture_rtv_, TRUE, nullptr); D3D12_VIEWPORT viewport; viewport.TopLeftX = 0.0f; viewport.TopLeftY = 0.0f; viewport.Width = float(kSwapTextureWidth); viewport.Height = float(kSwapTextureHeight); viewport.MinDepth = 0.0f; viewport.MaxDepth = 0.0f; command_list->RSSetViewports(1, &viewport); D3D12_RECT scissor; scissor.left = 0; scissor.top = 0; scissor.right = kSwapTextureWidth; scissor.bottom = kSwapTextureHeight; command_list->RSSetScissorRects(1, &scissor); D3D12GraphicsSystem* graphics_system = static_cast(graphics_system_); D3D12_GPU_DESCRIPTOR_HANDLE gamma_ramp_gpu_handle = provider->OffsetViewDescriptor(descriptor_gpu_start, 1); graphics_system->StretchTextureToFrontBuffer( descriptor_gpu_start, &gamma_ramp_gpu_handle, use_pwl_gamma_ramp ? (1.0f / 128.0f) : (1.0f / 256.0f), command_list); PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); // Don't care about graphics state because the frame is ending anyway. { std::lock_guard lock(swap_state_.mutex); swap_state_.width = kSwapTextureWidth; swap_state_.height = kSwapTextureHeight; swap_state_.front_buffer_texture = reinterpret_cast(swap_texture_srv_descriptor_heap_); } } } EndFrame(); if (cache_clear_requested_) { cache_clear_requested_ = false; GetD3D12Context()->AwaitAllFramesCompletion(); ui::d3d12::util::ReleaseAndNull(scratch_buffer_); scratch_buffer_size_ = 0; sampler_heap_pool_->ClearCache(); view_heap_pool_->ClearCache(); constant_buffer_pool_->ClearCache(); primitive_converter_->ClearCache(); pipeline_cache_->ClearCache(); render_target_cache_->ClearCache(); texture_cache_->ClearCache(); for (auto it : root_signatures_) { it.second->Release(); } root_signatures_.clear(); // TODO(Triang3l): Shared memory cache clear. // shared_memory_->ClearCache(); } } Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { return pipeline_cache_->LoadShader(shader_type, guest_address, host_address, dword_count); } bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) { auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); auto& regs = *register_file_; #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES auto enable_mode = static_cast( regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); if (enable_mode == xenos::ModeControl::kIgnore) { // Ignored. return true; } if (enable_mode == xenos::ModeControl::kCopy) { // Special copy handling. return IssueCopy(); } if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) { // Doesn't actually draw. return true; } if ((regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 && primitive_type != PrimitiveType::kPointList && primitive_type != PrimitiveType::kRectangleList) { // Both sides are culled - can't reproduce this with rasterizer state. return true; } // Shaders will have already been defined by previous loads. // We need them to do just about anything so validate here. auto vertex_shader = static_cast(active_vertex_shader()); auto pixel_shader = static_cast(active_pixel_shader()); if (!vertex_shader) { // Always need a vertex shader. return false; } // Depth-only mode doesn't need a pixel shader. if (enable_mode == xenos::ModeControl::kDepth) { pixel_shader = nullptr; } else if (!pixel_shader) { // Need a pixel shader in normal color mode. return false; } // Translate shaders now because to get the color mask, which is needed by the // render target cache. if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader)) { return false; } uint32_t color_mask = GetCurrentColorMask(pixel_shader); uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; if (!color_mask && ((rb_depthcontrol & (0x2 | 0x4)) != (0x2 | 0x4)) && (!(rb_depthcontrol & 0x1) || !(rb_stencilrefmask & (0xFF << 16)))) { // Not writing to color, depth or stencil, so doesn't draw. return true; } bool new_frame = BeginFrame(); auto command_list = GetCurrentCommandList(); // Set up the render targets - this may bind pipelines. if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) { // Doesn't actually draw. return true; } const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets = render_target_cache_->GetCurrentPipelineRenderTargets(); bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base; // TODO(Triang3l): Non-indexed line loops (by movc'ing zero to the vertex // index if it's one beyond the end). if (primitive_type == PrimitiveType::kLineLoop && !indexed) { return false; } // Set the primitive topology. PrimitiveType primitive_type_converted = PrimitiveConverter::GetReplacementPrimitiveType(primitive_type); D3D_PRIMITIVE_TOPOLOGY primitive_topology; switch (primitive_type_converted) { case PrimitiveType::kPointList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; case PrimitiveType::kLineList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; case PrimitiveType::kLineStrip: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; case PrimitiveType::kTriangleList: case PrimitiveType::kRectangleList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; case PrimitiveType::kTriangleStrip: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; case PrimitiveType::kQuadList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; break; default: return false; } if (primitive_topology_ != primitive_topology) { primitive_topology_ = primitive_topology; command_list->IASetPrimitiveTopology(primitive_topology); } // Get the pipeline and translate the shaders so used textures are known. ID3D12PipelineState* pipeline; ID3D12RootSignature* root_signature; auto pipeline_status = pipeline_cache_->ConfigurePipeline( vertex_shader, pixel_shader, primitive_type_converted, indexed ? index_buffer_info->format : IndexFormat::kInt16, pipeline_render_targets, &pipeline, &root_signature); if (pipeline_status == PipelineCache::UpdateStatus::kError) { return false; } // Update the textures - this may bind pipelines. texture_cache_->RequestTextures( vertex_shader->GetUsedTextureMask(), pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0); // Update viewport, scissor, blend factor and stencil reference. UpdateFixedFunctionState(command_list); // Bind the pipeline. if (current_pipeline_ != pipeline) { GetCurrentCommandList()->SetPipelineState(pipeline); current_pipeline_ = pipeline; } // Update system constants before uploading them. UpdateSystemConstantValues( indexed ? index_buffer_info->endianness : Endian::kUnspecified, pipeline_render_targets); // Update constant buffers, descriptors and root parameters. if (!UpdateBindings(command_list, vertex_shader, pixel_shader, root_signature)) { return false; } // Ensure vertex and index buffers are resident and draw. // TODO(Triang3l): Cache residency for ranges in a way similar to how texture // validity will be tracked. uint64_t vertex_buffers_resident[2] = {}; for (const auto& vertex_binding : vertex_shader->vertex_bindings()) { uint32_t vfetch_index = vertex_binding.fetch_constant; if (vertex_buffers_resident[vfetch_index >> 6] & (1ull << (vfetch_index & 63))) { continue; } uint32_t vfetch_constant_index = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2; if ((regs[vfetch_constant_index].u32 & 0x3) != 3) { XELOGW("Vertex fetch type is not 3 (fetch constant %u is %.8X %.8X)!", vfetch_index, regs[vfetch_constant_index].u32, regs[vfetch_constant_index + 1].u32); return false; } shared_memory_->RequestRange( regs[vfetch_constant_index].u32 & 0x1FFFFFFC, regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC); vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); } if (IsROVUsedForEDRAM()) { render_target_cache_->UseEDRAMAsUAV(); } if (indexed) { uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32 ? sizeof(uint32_t) : sizeof(uint16_t); assert_false(index_buffer_info->guest_base & (index_size - 1)); uint32_t index_base = index_buffer_info->guest_base & 0x1FFFFFFF & ~(index_size - 1); D3D12_INDEX_BUFFER_VIEW index_buffer_view; index_buffer_view.Format = index_buffer_info->format == IndexFormat::kInt32 ? DXGI_FORMAT_R32_UINT : DXGI_FORMAT_R16_UINT; uint32_t converted_index_count; PrimitiveConverter::ConversionResult conversion_result = primitive_converter_->ConvertPrimitives( primitive_type, index_buffer_info->guest_base, index_count, index_buffer_info->format, index_buffer_info->endianness, index_buffer_view.BufferLocation, converted_index_count); if (conversion_result == PrimitiveConverter::ConversionResult::kFailed) { return false; } if (conversion_result == PrimitiveConverter::ConversionResult::kPrimitiveEmpty) { return true; } if (conversion_result == PrimitiveConverter::ConversionResult::kConverted) { index_buffer_view.SizeInBytes = converted_index_count * index_size; index_count = converted_index_count; } else { uint32_t index_buffer_size = index_buffer_info->count * index_size; shared_memory_->RequestRange(index_base, index_buffer_size); index_buffer_view.BufferLocation = shared_memory_->GetGPUAddress() + index_base; index_buffer_view.SizeInBytes = index_buffer_size; } shared_memory_->UseForReading(); command_list->IASetIndexBuffer(&index_buffer_view); SubmitBarriers(); command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0); } else { // Check if need to draw using a conversion index buffer. uint32_t converted_index_count; D3D12_GPU_VIRTUAL_ADDRESS conversion_gpu_address = primitive_converter_->GetStaticIndexBuffer(primitive_type, index_count, converted_index_count); shared_memory_->UseForReading(); SubmitBarriers(); if (conversion_gpu_address) { D3D12_INDEX_BUFFER_VIEW index_buffer_view; index_buffer_view.BufferLocation = conversion_gpu_address; index_buffer_view.SizeInBytes = converted_index_count * sizeof(uint16_t); index_buffer_view.Format = DXGI_FORMAT_R16_UINT; command_list->IASetIndexBuffer(&index_buffer_view); command_list->DrawIndexedInstanced(converted_index_count, 1, 0, 0, 0); } else { command_list->DrawInstanced(index_count, 1, 0, 0); } } return true; } bool D3D12CommandProcessor::IssueCopy() { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES BeginFrame(); return render_target_cache_->Resolve(shared_memory_.get(), texture_cache_.get(), memory_); } bool D3D12CommandProcessor::BeginFrame() { if (current_queue_frame_ != UINT32_MAX) { return false; } #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES auto context = GetD3D12Context(); auto provider = context->GetD3D12Provider(); context->BeginSwap(); current_queue_frame_ = context->GetCurrentQueueFrame(); // Remove outdated temporary buffers. uint64_t last_completed_frame = context->GetLastCompletedFrame(); auto erase_buffers_end = buffers_for_deletion_.begin(); while (erase_buffers_end != buffers_for_deletion_.end()) { uint64_t upload_frame = erase_buffers_end->last_usage_frame; if (upload_frame > last_completed_frame) { ++erase_buffers_end; break; } erase_buffers_end->buffer->Release(); ++erase_buffers_end; } buffers_for_deletion_.erase(buffers_for_deletion_.begin(), erase_buffers_end); // Reset fixed-function state. ff_viewport_update_needed_ = true; ff_scissor_update_needed_ = true; ff_blend_factor_update_needed_ = true; ff_stencil_ref_update_needed_ = true; // Since a new command list is being started, sample positions are reset to // centers. current_sample_positions_ = MsaaSamples::k1X; // Reset bindings, particularly because the buffers backing them are recycled. current_pipeline_ = nullptr; current_graphics_root_signature_ = nullptr; current_graphics_root_up_to_date_ = 0; current_view_heap_ = nullptr; current_sampler_heap_ = nullptr; std::memset(current_float_constant_map_vertex_, 0, sizeof(current_float_constant_map_vertex_)); std::memset(current_float_constant_map_pixel_, 0, sizeof(current_float_constant_map_pixel_)); cbuffer_bindings_system_.up_to_date = false; cbuffer_bindings_float_vertex_.up_to_date = false; cbuffer_bindings_float_pixel_.up_to_date = false; cbuffer_bindings_bool_loop_.up_to_date = false; cbuffer_bindings_fetch_.up_to_date = false; draw_view_full_update_ = 0; draw_sampler_full_update_ = 0; texture_bindings_written_vertex_ = false; texture_bindings_written_pixel_ = false; samplers_written_vertex_ = false; samplers_written_pixel_ = false; primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; pix_capturing_ = pix_capture_requested_.exchange(false, std::memory_order_relaxed); if (pix_capturing_) { IDXGraphicsAnalysis* graphics_analysis = provider->GetGraphicsAnalysis(); if (graphics_analysis != nullptr) { graphics_analysis->BeginCapture(); } } command_lists_[current_queue_frame_]->BeginRecording(); constant_buffer_pool_->BeginFrame(); view_heap_pool_->BeginFrame(); sampler_heap_pool_->BeginFrame(); shared_memory_->BeginFrame(); texture_cache_->BeginFrame(); render_target_cache_->BeginFrame(); primitive_converter_->BeginFrame(); return true; } bool D3D12CommandProcessor::EndFrame() { if (current_queue_frame_ == UINT32_MAX) { return false; } assert_false(scratch_buffer_used_); primitive_converter_->EndFrame(); render_target_cache_->EndFrame(); texture_cache_->EndFrame(); shared_memory_->EndFrame(); // Submit barriers now because resources the queued barriers are for may be // destroyed between frames. SubmitBarriers(); command_lists_[current_queue_frame_]->Execute(); if (pix_capturing_) { IDXGraphicsAnalysis* graphics_analysis = GetD3D12Context()->GetD3D12Provider()->GetGraphicsAnalysis(); if (graphics_analysis != nullptr) { graphics_analysis->EndCapture(); } pix_capturing_ = false; } sampler_heap_pool_->EndFrame(); view_heap_pool_->EndFrame(); constant_buffer_pool_->EndFrame(); auto context = GetD3D12Context(); context->EndSwap(); current_queue_frame_ = UINT32_MAX; return true; } void D3D12CommandProcessor::UpdateFixedFunctionState( ID3D12GraphicsCommandList* command_list) { auto& regs = *register_file_; #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES // Window parameters. // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // See r200UpdateWindow: // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; int16_t window_offset_x = pa_sc_window_offset & 0x7FFF; int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF; if (window_offset_x & 0x4000) { window_offset_x |= 0x8000; } if (window_offset_y & 0x4000) { window_offset_y |= 0x8000; } // Supersampling replacing multisampling due to difficulties of emulating // EDRAM with multisampling. MsaaSamples msaa_samples = MsaaSamples((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3); uint32_t ssaa_scale_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1; uint32_t ssaa_scale_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1; // Viewport. // PA_CL_VTE_CNTL contains whether offsets and scales are enabled. // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // In games, either all are enabled (for regular drawing) or none are (for // rectangle lists usually). // // If scale/offset is enabled, the Xenos shader is writing (neglecting W // division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1) // box. If it's not, the position is in screen space. Since we can only use // the NDC in PC APIs, we use a viewport of the largest possible size, and // divide the position by it in translated shaders. uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; float viewport_scale_x = (pa_cl_vte_cntl & (1 << 0)) ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32) : 1280.0f; float viewport_scale_y = (pa_cl_vte_cntl & (1 << 2)) ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) : 1280.0f; float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4)) ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1.0f; float viewport_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : std::abs(viewport_scale_x); float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : std::abs(viewport_scale_y); float viewport_offset_z = (pa_cl_vte_cntl & (1 << 5)) ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0.0f; if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { viewport_offset_x += float(window_offset_x); viewport_offset_y += float(window_offset_y); } D3D12_VIEWPORT viewport; viewport.TopLeftX = (viewport_offset_x - viewport_scale_x) * float(ssaa_scale_x); viewport.TopLeftY = (viewport_offset_y - viewport_scale_y) * float(ssaa_scale_y); viewport.Width = viewport_scale_x * 2.0f * float(ssaa_scale_x); viewport.Height = viewport_scale_y * 2.0f * float(ssaa_scale_y); viewport.MinDepth = viewport_offset_z; viewport.MaxDepth = viewport_offset_z + viewport_scale_z; if (viewport_scale_z < 0.0f) { // MinDepth > MaxDepth doesn't work on Nvidia, emulating it in vertex // shaders and when applying polygon offset. std::swap(viewport.MinDepth, viewport.MaxDepth); } ff_viewport_update_needed_ |= ff_viewport_.TopLeftX != viewport.TopLeftX; ff_viewport_update_needed_ |= ff_viewport_.TopLeftY != viewport.TopLeftY; ff_viewport_update_needed_ |= ff_viewport_.Width != viewport.Width; ff_viewport_update_needed_ |= ff_viewport_.Height != viewport.Height; ff_viewport_update_needed_ |= ff_viewport_.MinDepth != viewport.MinDepth; ff_viewport_update_needed_ |= ff_viewport_.MaxDepth != viewport.MaxDepth; if (ff_viewport_update_needed_) { ff_viewport_ = viewport; command_list->RSSetViewports(1, &viewport); ff_viewport_update_needed_ = false; } // Scissor. uint32_t pa_sc_window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; uint32_t pa_sc_window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; D3D12_RECT scissor; scissor.left = pa_sc_window_scissor_tl & 0x7FFF; scissor.top = (pa_sc_window_scissor_tl >> 16) & 0x7FFF; scissor.right = pa_sc_window_scissor_br & 0x7FFF; scissor.bottom = (pa_sc_window_scissor_br >> 16) & 0x7FFF; if (!(pa_sc_window_scissor_tl & (1u << 31))) { // !WINDOW_OFFSET_DISABLE. scissor.left = std::max(scissor.left + window_offset_x, LONG(0)); scissor.top = std::max(scissor.top + window_offset_y, LONG(0)); scissor.right = std::max(scissor.right + window_offset_x, LONG(0)); scissor.bottom = std::max(scissor.bottom + window_offset_y, LONG(0)); } scissor.left *= ssaa_scale_x; scissor.top *= ssaa_scale_y; scissor.right *= ssaa_scale_x; scissor.bottom *= ssaa_scale_y; ff_scissor_update_needed_ |= ff_scissor_.left != scissor.left; ff_scissor_update_needed_ |= ff_scissor_.top != scissor.top; ff_scissor_update_needed_ |= ff_scissor_.right != scissor.right; ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor.bottom; if (ff_scissor_update_needed_) { ff_scissor_ = scissor; command_list->RSSetScissorRects(1, &scissor); ff_scissor_update_needed_ = false; } if (!IsROVUsedForEDRAM()) { // Blend factor. ff_blend_factor_update_needed_ |= ff_blend_factor_[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32; ff_blend_factor_update_needed_ |= ff_blend_factor_[1] != regs[XE_GPU_REG_RB_BLEND_GREEN].f32; ff_blend_factor_update_needed_ |= ff_blend_factor_[2] != regs[XE_GPU_REG_RB_BLEND_BLUE].f32; ff_blend_factor_update_needed_ |= ff_blend_factor_[3] != regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; if (ff_blend_factor_update_needed_) { ff_blend_factor_[0] = regs[XE_GPU_REG_RB_BLEND_RED].f32; ff_blend_factor_[1] = regs[XE_GPU_REG_RB_BLEND_GREEN].f32; ff_blend_factor_[2] = regs[XE_GPU_REG_RB_BLEND_BLUE].f32; ff_blend_factor_[3] = regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; command_list->OMSetBlendFactor(ff_blend_factor_); ff_blend_factor_update_needed_ = false; } // Stencil reference value. uint32_t stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK].u32 & 0xFF; ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref; if (ff_stencil_ref_update_needed_) { ff_stencil_ref_ = stencil_ref; command_list->OMSetStencilRef(stencil_ref); ff_stencil_ref_update_needed_ = false; } } } void D3D12CommandProcessor::UpdateSystemConstantValues( Endian index_endian, const RenderTargetCache::PipelineRenderTarget render_targets[4]) { auto& regs = *register_file_; #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES uint32_t vgt_indx_offset = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32; uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32; uint32_t pa_su_point_size = regs[XE_GPU_REG_PA_SU_POINT_SIZE].u32; uint32_t pa_su_point_minmax = regs[XE_GPU_REG_PA_SU_POINT_MINMAX].u32; uint32_t sq_program_cntl = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; uint32_t sq_context_misc = regs[XE_GPU_REG_SQ_CONTEXT_MISC].u32; uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32; uint32_t rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].u32; bool dirty = false; // Flags. uint32_t flags = 0; // W0 division control. // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0. // = false: multiply the X, Y coordinates by 1/W0. // 9: VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. // = false: multiply the Z coordinate by 1/W0. // 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal // to get 1/W0. if (pa_cl_vte_cntl & (1 << 8)) { flags |= DxbcShaderTranslator::kSysFlag_XYDividedByW; } if (pa_cl_vte_cntl & (1 << 9)) { flags |= DxbcShaderTranslator::kSysFlag_ZDividedByW; } if (pa_cl_vte_cntl & (1 << 10)) { flags |= DxbcShaderTranslator::kSysFlag_WNotReciprocal; } // Reversed depth. if ((pa_cl_vte_cntl & (1 << 4)) && regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 < 0.0f) { flags |= DxbcShaderTranslator::kSysFlag_ReverseZ; } // Gamma writing. if (((regs[XE_GPU_REG_RB_COLOR_INFO].u32 >> 16) & 0xF) == uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) { flags |= DxbcShaderTranslator::kSysFlag_Color0Gamma; } if (((regs[XE_GPU_REG_RB_COLOR1_INFO].u32 >> 16) & 0xF) == uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) { flags |= DxbcShaderTranslator::kSysFlag_Color1Gamma; } if (((regs[XE_GPU_REG_RB_COLOR2_INFO].u32 >> 16) & 0xF) == uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) { flags |= DxbcShaderTranslator::kSysFlag_Color2Gamma; } if (((regs[XE_GPU_REG_RB_COLOR3_INFO].u32 >> 16) & 0xF) == uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) { flags |= DxbcShaderTranslator::kSysFlag_Color3Gamma; } if (IsROVUsedForEDRAM()) { if (rb_depthcontrol & (0x1 | 0x2)) { flags |= DxbcShaderTranslator::kSysFlag_DepthStencil; if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) == DepthRenderTargetFormat::kD24FS8) { flags |= DxbcShaderTranslator::kSysFlag_DepthFloat24; } if (rb_depthcontrol & 0x2) { flags |= ((rb_depthcontrol >> 4) & 0x7) << DxbcShaderTranslator::kSysFlag_DepthPassIfLess_Shift; if (rb_depthcontrol & 0x4) { flags |= DxbcShaderTranslator::kSysFlag_DepthWriteMask | DxbcShaderTranslator::kSysFlag_DepthStencilWrite; } } else { // In case stencil is used without depth testing - always pass, and // don't modify the stored depth. flags |= DxbcShaderTranslator::kSysFlag_DepthPassIfLess | DxbcShaderTranslator::kSysFlag_DepthPassIfEqual | DxbcShaderTranslator::kSysFlag_DepthPassIfGreater; } if (rb_depthcontrol & 0x1) { flags |= DxbcShaderTranslator::kSysFlag_StencilTest; if (rb_stencilrefmask & (0xFF << 16)) { flags |= DxbcShaderTranslator::kSysFlag_DepthStencilWrite; } } } } dirty |= system_constants_.flags != flags; system_constants_.flags = flags; // Vertex index offset. dirty |= system_constants_.vertex_base_index != vgt_indx_offset; system_constants_.vertex_base_index = vgt_indx_offset; // Index buffer endianness. dirty |= system_constants_.vertex_index_endian != uint32_t(index_endian); system_constants_.vertex_index_endian = uint32_t(index_endian); // Conversion to Direct3D 12 normalized device coordinates. // See viewport configuration in UpdateFixedFunctionState for explanations. // X and Y scale/offset is to convert unnormalized coordinates generated by // shaders (for rectangle list drawing, for instance) to the 2560x2560 // viewport that is used to emulate unnormalized coordinates. // Z scale/offset is to convert from OpenGL NDC to Direct3D NDC if needed. // Also apply half-pixel offset to reproduce Direct3D 9 rasterization rules. // TODO(Triang3l): Check if pixel coordinates need to be offset depending on a // different register (and if there's such register at all). float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; bool gl_clip_space_def = !(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); float ndc_scale_x, ndc_scale_y; if (pa_cl_vte_cntl & (1 << 0)) { ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f; } else { ndc_scale_x = 1.0f / 1280.0f; } if (pa_cl_vte_cntl & (1 << 2)) { ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f; } else { ndc_scale_y = -1.0f / 1280.0f; } float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f; float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : 1.0f; float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; float pixel_half_pixel_offset = 0.0f; if (FLAGS_d3d12_half_pixel_offset && !(pa_su_vtx_cntl & (1 << 0))) { // Signs are hopefully correct here, tested in GTA IV on both clearing // (without a viewport) and drawing things near the edges of the screen. if (pa_cl_vte_cntl & (1 << 0)) { if (viewport_scale_x != 0.0f) { ndc_offset_x += 0.5f / viewport_scale_x; } } else { ndc_offset_x += 1.0f / 2560.0f; } if (pa_cl_vte_cntl & (1 << 2)) { if (viewport_scale_y != 0.0f) { ndc_offset_y += 0.5f / viewport_scale_y; } } else { ndc_offset_y -= 1.0f / 2560.0f; } pixel_half_pixel_offset = -0.5f; } dirty |= system_constants_.ndc_scale[0] != ndc_scale_x; dirty |= system_constants_.ndc_scale[1] != ndc_scale_y; dirty |= system_constants_.ndc_scale[2] != ndc_scale_z; dirty |= system_constants_.ndc_offset[0] != ndc_offset_x; dirty |= system_constants_.ndc_offset[1] != ndc_offset_y; dirty |= system_constants_.ndc_offset[2] != ndc_offset_z; dirty |= system_constants_.pixel_half_pixel_offset != pixel_half_pixel_offset; system_constants_.ndc_scale[0] = ndc_scale_x; system_constants_.ndc_scale[1] = ndc_scale_y; system_constants_.ndc_scale[2] = ndc_scale_z; system_constants_.ndc_offset[0] = ndc_offset_x; system_constants_.ndc_offset[1] = ndc_offset_y; system_constants_.ndc_offset[2] = ndc_offset_z; system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset; // Point size. float point_size_x = float(pa_su_point_size >> 16) * 0.125f; float point_size_y = float(pa_su_point_size & 0xFFFF) * 0.125f; float point_size_min = float(pa_su_point_minmax & 0xFFFF) * 0.125f; float point_size_max = float(pa_su_point_minmax >> 16) * 0.125f; dirty |= system_constants_.point_size[0] != point_size_x; dirty |= system_constants_.point_size[1] != point_size_y; dirty |= system_constants_.point_size_min_max[0] != point_size_min; dirty |= system_constants_.point_size_min_max[1] != point_size_max; system_constants_.point_size[0] = point_size_x; system_constants_.point_size[1] = point_size_y; system_constants_.point_size_min_max[0] = point_size_min; system_constants_.point_size_min_max[1] = point_size_max; float point_screen_to_ndc_x, point_screen_to_ndc_y; if (pa_cl_vte_cntl & (1 << 0)) { point_screen_to_ndc_x = (viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f; } else { point_screen_to_ndc_x = 1.0f / 2560.0f; } if (pa_cl_vte_cntl & (1 << 2)) { point_screen_to_ndc_y = (viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f; } else { point_screen_to_ndc_y = -1.0f / 2560.0f; } dirty |= system_constants_.point_screen_to_ndc[0] != point_screen_to_ndc_x; dirty |= system_constants_.point_screen_to_ndc[1] != point_screen_to_ndc_y; system_constants_.point_screen_to_ndc[0] = point_screen_to_ndc_x; system_constants_.point_screen_to_ndc[1] = point_screen_to_ndc_y; // Pixel position register. uint32_t pixel_pos_reg = (sq_program_cntl & (1 << 18)) ? (sq_context_misc >> 8) & 0xFF : UINT_MAX; dirty |= system_constants_.pixel_pos_reg != pixel_pos_reg; system_constants_.pixel_pos_reg = pixel_pos_reg; // Supersampling anti-aliasing pixel scale inverse for pixel positions. MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); float ssaa_inv_scale_x = msaa_samples >= MsaaSamples::k4X ? 0.5f : 1.0f; float ssaa_inv_scale_y = msaa_samples >= MsaaSamples::k2X ? 0.5f : 1.0f; dirty |= system_constants_.ssaa_inv_scale[0] != ssaa_inv_scale_x; dirty |= system_constants_.ssaa_inv_scale[1] != ssaa_inv_scale_y; system_constants_.ssaa_inv_scale[0] = ssaa_inv_scale_x; system_constants_.ssaa_inv_scale[1] = ssaa_inv_scale_y; // Alpha test. int32_t alpha_test; if (rb_colorcontrol & 0x8) { uint32_t alpha_test_function = rb_colorcontrol & 0x7; // 0: Never - fail in [-inf, +inf]. // 1: Less - fail in [ref, +inf]. // 2: Equal - pass in [ref, ref]. // 3: Less or equal - pass in [-inf, ref]. // 4: Greater - fail in [-inf, ref]. // 5: Not equal - fail in [ref, ref]. // 6: Greater or equal - pass in [ref, +inf]. // 7: Always - pass in [-inf, +inf]. alpha_test = (alpha_test_function & 0x2) ? 1 : -1; uint32_t alpha_test_range_start = (alpha_test_function == 1 || alpha_test_function == 2 || alpha_test_function == 5 || alpha_test_function == 6) ? rb_alpha_ref : 0xFF800000u; uint32_t alpha_test_range_end = (alpha_test_function == 2 || alpha_test_function == 3 || alpha_test_function == 4 || alpha_test_function == 5) ? rb_alpha_ref : 0x7F800000u; dirty |= system_constants_.alpha_test_range[0] != alpha_test_range_start; dirty |= system_constants_.alpha_test_range[1] != alpha_test_range_end; system_constants_.alpha_test_range[0] = alpha_test_range_start; system_constants_.alpha_test_range[1] = alpha_test_range_end; } else { alpha_test = 0; } dirty |= system_constants_.alpha_test != alpha_test; system_constants_.alpha_test = alpha_test; // Color exponent bias and output index mapping or ROV writing. uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; bool colorcontrol_blend_enable = (regs[XE_GPU_REG_RB_COLORCONTROL].u32 & 0x20) == 0; for (uint32_t i = 0; i < 4; ++i) { uint32_t color_info, blend_control; switch (i) { case 1: color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32; blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32; break; case 2: color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32; blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32; break; case 3: color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32; blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32; break; default: color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32; blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32; } // Exponent bias is in bits 20:25 of RB_COLOR_INFO. int32_t color_exp_bias = int32_t(color_info << 6) >> 26; ColorRenderTargetFormat color_format = RenderTargetCache::GetBaseColorFormat( ColorRenderTargetFormat((color_info >> 16) & 0xF)); if (color_format == ColorRenderTargetFormat::k_16_16 || color_format == ColorRenderTargetFormat::k_16_16_16_16) { // On the Xbox 360, k_16_16_EDRAM and k_16_16_16_16_EDRAM internally have // -32...32 range and expect shaders to give -32...32 values, but they're // emulated using normalized RG16/RGBA16 when not using the ROV, so the // value returned from the shader needs to be divided by 32 (blending will // be incorrect in this case, but there's no other way without using ROV). // http://www.students.science.uu.nl/~3220516/advancedgraphics/papers/inferred_lighting.pdf if (!IsROVUsedForEDRAM()) { color_exp_bias -= 5; } } float color_exp_bias_scale; *reinterpret_cast(&color_exp_bias_scale) = 0x3F800000 + (color_exp_bias << 23); dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale; system_constants_.color_exp_bias[i] = color_exp_bias_scale; if (IsROVUsedForEDRAM()) { uint32_t edram_base_dwords = (color_info & 0xFFF) * 1280; dirty |= system_constants_.edram_base_dwords[i] != edram_base_dwords; system_constants_.edram_base_dwords[i] = edram_base_dwords; uint32_t edram_pitch_tiles = ((std::min(rb_surface_info & 0x3FFFu, 2560u) * (msaa_samples >= MsaaSamples::k4X ? 2 : 1)) + 79) / 80; dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles; system_constants_.edram_pitch_tiles = edram_pitch_tiles; uint32_t rt_flags = DxbcShaderTranslator::GetColorFormatRTFlags(color_format); // Exclude unused components from the write mask. uint32_t rt_mask = (rb_color_mask >> (i * 4)) & 0xF & ~(rt_flags >> DxbcShaderTranslator::kRTFlag_FormatUnusedR_Shift); if (rt_mask != 0) { rt_flags |= rt_mask << DxbcShaderTranslator::kRTFlag_WriteR_Shift; uint32_t blend_x, blend_y; if (colorcontrol_blend_enable && DxbcShaderTranslator::GetBlendConstants(blend_control, blend_x, blend_y)) { rt_flags |= DxbcShaderTranslator::kRTFlag_Blend; uint32_t rt_pair_index = i >> 1; uint32_t rt_pair_comp = (i & 1) << 1; if (system_constants_ .edram_blend_rt01_rt23[rt_pair_index][rt_pair_comp] != blend_x) { dirty = true; system_constants_ .edram_blend_rt01_rt23[rt_pair_index][rt_pair_comp] = blend_x; } if (system_constants_ .edram_blend_rt01_rt23[rt_pair_index][rt_pair_comp + 1] != blend_y) { dirty = true; system_constants_ .edram_blend_rt01_rt23[rt_pair_index][rt_pair_comp + 1] = blend_y; } } } dirty |= system_constants_.edram_rt_flags[i] != rt_flags; system_constants_.edram_rt_flags[i] = rt_flags; if (system_constants_color_formats_[i] != color_format) { dirty = true; DxbcShaderTranslator::SetColorFormatSystemConstants(system_constants_, i, color_format); system_constants_color_formats_[i] = color_format; } } else { dirty |= system_constants_.color_output_map[i] != render_targets[i].guest_render_target; system_constants_.color_output_map[i] = render_targets[i].guest_render_target; } } // Depth/stencil testing and blend constant for ROV blending. if (IsROVUsedForEDRAM()) { uint32_t depth_base_dwords = (regs[XE_GPU_REG_RB_DEPTH_INFO].u32 & 0xFFF) * 1280; dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords; system_constants_.edram_depth_base_dwords = depth_base_dwords; if (rb_depthcontrol & 0x1) { uint32_t stencil_value; stencil_value = rb_stencilrefmask & 0xFF; dirty |= system_constants_.edram_stencil_reference != stencil_value; system_constants_.edram_stencil_reference = stencil_value; stencil_value = (rb_stencilrefmask >> 8) & 0xFF; dirty |= system_constants_.edram_stencil_read_mask != stencil_value; system_constants_.edram_stencil_read_mask = stencil_value; stencil_value = (rb_stencilrefmask >> 16) & 0xFF; dirty |= system_constants_.edram_stencil_write_mask != stencil_value; system_constants_.edram_stencil_write_mask = stencil_value; static const uint32_t kStencilOpMap[] = { DxbcShaderTranslator::kStencilOp_Keep, DxbcShaderTranslator::kStencilOp_Zero, DxbcShaderTranslator::kStencilOp_Replace, DxbcShaderTranslator::kStencilOp_IncrementSaturate, DxbcShaderTranslator::kStencilOp_DecrementSaturate, DxbcShaderTranslator::kStencilOp_Invert, DxbcShaderTranslator::kStencilOp_Increment, DxbcShaderTranslator::kStencilOp_Decrement, }; stencil_value = kStencilOpMap[(rb_depthcontrol >> 11) & 0x7]; dirty |= system_constants_.edram_stencil_front_fail != stencil_value; system_constants_.edram_stencil_front_fail = stencil_value; stencil_value = kStencilOpMap[(rb_depthcontrol >> 17) & 0x7]; dirty |= system_constants_.edram_stencil_front_depth_fail != stencil_value; system_constants_.edram_stencil_front_depth_fail = stencil_value; stencil_value = kStencilOpMap[(rb_depthcontrol >> 14) & 0x7]; dirty |= system_constants_.edram_stencil_front_pass != stencil_value; system_constants_.edram_stencil_front_pass = stencil_value; stencil_value = (rb_depthcontrol >> 8) & 0x7; dirty |= system_constants_.edram_stencil_front_comparison != stencil_value; system_constants_.edram_stencil_front_comparison = stencil_value; if (rb_depthcontrol & 0x80) { stencil_value = kStencilOpMap[(rb_depthcontrol >> 23) & 0x7]; dirty |= system_constants_.edram_stencil_back_fail != stencil_value; system_constants_.edram_stencil_back_fail = stencil_value; stencil_value = kStencilOpMap[(rb_depthcontrol >> 29) & 0x7]; dirty |= system_constants_.edram_stencil_back_depth_fail != stencil_value; system_constants_.edram_stencil_back_depth_fail = stencil_value; stencil_value = kStencilOpMap[(rb_depthcontrol >> 26) & 0x7]; dirty |= system_constants_.edram_stencil_back_pass != stencil_value; system_constants_.edram_stencil_back_pass = stencil_value; stencil_value = (rb_depthcontrol >> 20) & 0x7; dirty |= system_constants_.edram_stencil_back_comparison != stencil_value; system_constants_.edram_stencil_back_comparison = stencil_value; } else { dirty |= std::memcmp(system_constants_.edram_stencil_back, system_constants_.edram_stencil_front, 4 * sizeof(uint32_t)) != 0; std::memcpy(system_constants_.edram_stencil_back, system_constants_.edram_stencil_front, 4 * sizeof(uint32_t)); } } dirty |= system_constants_.edram_blend_constant[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32; system_constants_.edram_blend_constant[0] = regs[XE_GPU_REG_RB_BLEND_RED].f32; dirty |= system_constants_.edram_blend_constant[1] != regs[XE_GPU_REG_RB_BLEND_GREEN].f32; system_constants_.edram_blend_constant[1] = regs[XE_GPU_REG_RB_BLEND_GREEN].f32; dirty |= system_constants_.edram_blend_constant[2] != regs[XE_GPU_REG_RB_BLEND_BLUE].f32; system_constants_.edram_blend_constant[2] = regs[XE_GPU_REG_RB_BLEND_BLUE].f32; dirty |= system_constants_.edram_blend_constant[3] != regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; system_constants_.edram_blend_constant[3] = regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; } cbuffer_bindings_system_.up_to_date &= !dirty; } bool D3D12CommandProcessor::UpdateBindings( ID3D12GraphicsCommandList* command_list, const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, ID3D12RootSignature* root_signature) { auto provider = GetD3D12Context()->GetD3D12Provider(); auto device = provider->GetDevice(); auto& regs = *register_file_; #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES // Bind the new root signature. if (current_graphics_root_signature_ != root_signature) { current_graphics_root_signature_ = root_signature; GetRootExtraParameterIndices(vertex_shader, pixel_shader, current_graphics_root_extras_); // We don't know which root parameters are up to date anymore. current_graphics_root_up_to_date_ = 0; command_list->SetGraphicsRootSignature(root_signature); } XXH64_state_t hash_state; // Get textures and samplers used by the vertex shader. uint32_t texture_count_vertex, sampler_count_vertex; const D3D12Shader::TextureSRV* textures_vertex = vertex_shader->GetTextureSRVs(texture_count_vertex); uint64_t texture_bindings_hash_vertex = texture_count_vertex != 0 ? texture_cache_->GetDescriptorHashForActiveTextures( textures_vertex, texture_count_vertex) : 0; const D3D12Shader::SamplerBinding* samplers_vertex = vertex_shader->GetSamplerBindings(sampler_count_vertex); XXH64_reset(&hash_state, 0); for (uint32_t i = 0; i < sampler_count_vertex; ++i) { TextureCache::SamplerParameters sampler_parameters = texture_cache_->GetSamplerParameters(samplers_vertex[i]); XXH64_update(&hash_state, &sampler_parameters, sizeof(sampler_parameters)); } uint64_t samplers_hash_vertex = XXH64_digest(&hash_state); // Get textures and samplers used by the pixel shader. uint32_t texture_count_pixel, sampler_count_pixel; const D3D12Shader::TextureSRV* textures_pixel; const D3D12Shader::SamplerBinding* samplers_pixel; if (pixel_shader != nullptr) { textures_pixel = pixel_shader->GetTextureSRVs(texture_count_pixel); samplers_pixel = pixel_shader->GetSamplerBindings(sampler_count_pixel); } else { textures_pixel = nullptr; texture_count_pixel = 0; samplers_pixel = nullptr; sampler_count_pixel = 0; } uint64_t texture_bindings_hash_pixel = texture_count_pixel != 0 ? texture_cache_->GetDescriptorHashForActiveTextures( textures_pixel, texture_count_pixel) : 0; XXH64_reset(&hash_state, 0); for (uint32_t i = 0; i < sampler_count_pixel; ++i) { TextureCache::SamplerParameters sampler_parameters = texture_cache_->GetSamplerParameters(samplers_pixel[i]); XXH64_update(&hash_state, &sampler_parameters, sizeof(sampler_parameters)); } uint64_t samplers_hash_pixel = XXH64_digest(&hash_state); // Begin updating descriptors. bool write_system_constant_view = false; bool write_float_constant_view_vertex = false; bool write_float_constant_view_pixel = false; bool write_bool_loop_constant_view = false; bool write_fetch_constant_view = false; bool write_textures_vertex = texture_count_vertex != 0 && (!texture_bindings_written_vertex_ || current_texture_bindings_hash_vertex_ != texture_bindings_hash_vertex); bool write_textures_pixel = texture_count_pixel != 0 && (!texture_bindings_written_pixel_ || current_texture_bindings_hash_pixel_ != texture_bindings_hash_pixel); bool write_samplers_vertex = sampler_count_vertex != 0 && (!samplers_written_vertex_ || current_samplers_hash_vertex_ != samplers_hash_vertex); bool write_samplers_pixel = sampler_count_pixel != 0 && (!samplers_written_pixel_ || current_samplers_hash_pixel_ != samplers_hash_pixel); // Check if the float constant layout is still the same and get the counts. const Shader::ConstantRegisterMap& float_constant_map_vertex = vertex_shader->constant_register_map(); uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count; // Even if the shader doesn't need any float constants, a valid binding must // still be provided, so if the first draw in the frame with the current root // signature doesn't have float constants at all, still allocate an empty // buffer. uint32_t float_constant_size_vertex = xe::align( uint32_t(std::max(float_constant_count_vertex, 1u) * 4 * sizeof(float)), 256u); for (uint32_t i = 0; i < 4; ++i) { if (current_float_constant_map_vertex_[i] != float_constant_map_vertex.float_bitmap[i]) { current_float_constant_map_vertex_[i] = float_constant_map_vertex.float_bitmap[i]; // If no float constants at all, we can reuse any buffer for them, so not // invalidating. if (float_constant_map_vertex.float_count != 0) { cbuffer_bindings_float_vertex_.up_to_date = false; } } } uint32_t float_constant_count_pixel = 0; if (pixel_shader != nullptr) { const Shader::ConstantRegisterMap& float_constant_map_pixel = pixel_shader->constant_register_map(); float_constant_count_pixel = float_constant_map_pixel.float_count; for (uint32_t i = 0; i < 4; ++i) { if (current_float_constant_map_pixel_[i] != float_constant_map_pixel.float_bitmap[i]) { current_float_constant_map_pixel_[i] = float_constant_map_pixel.float_bitmap[i]; if (float_constant_map_pixel.float_count != 0) { cbuffer_bindings_float_pixel_.up_to_date = false; } } } } else { std::memset(current_float_constant_map_pixel_, 0, sizeof(current_float_constant_map_pixel_)); } uint32_t float_constant_size_pixel = xe::align( uint32_t(std::max(float_constant_count_pixel, 1u) * 4 * sizeof(float)), 256u); // Update constant buffers. if (!cbuffer_bindings_system_.up_to_date) { uint8_t* system_constants = constant_buffer_pool_->RequestFull( xe::align(uint32_t(sizeof(system_constants_)), 256u), nullptr, nullptr, &cbuffer_bindings_system_.buffer_address); if (system_constants == nullptr) { return false; } std::memcpy(system_constants, &system_constants_, sizeof(system_constants_)); cbuffer_bindings_system_.up_to_date = true; write_system_constant_view = true; } if (!cbuffer_bindings_float_vertex_.up_to_date) { uint8_t* float_constants = constant_buffer_pool_->RequestFull( float_constant_size_vertex, nullptr, nullptr, &cbuffer_bindings_float_vertex_.buffer_address); if (float_constants == nullptr) { return false; } for (uint32_t i = 0; i < 4; ++i) { uint64_t float_constant_map_entry = float_constant_map_vertex.float_bitmap[i]; uint32_t float_constant_index; while (xe::bit_scan_forward(float_constant_map_entry, &float_constant_index)) { float_constant_map_entry &= ~(1ull << float_constant_index); std::memcpy(float_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + (float_constant_index << 2)] .f32, 4 * sizeof(float)); float_constants += 4 * sizeof(float); } } cbuffer_bindings_float_vertex_.up_to_date = true; write_float_constant_view_vertex = true; } if (!cbuffer_bindings_float_pixel_.up_to_date) { uint8_t* float_constants = constant_buffer_pool_->RequestFull( float_constant_size_pixel, nullptr, nullptr, &cbuffer_bindings_float_pixel_.buffer_address); if (float_constants == nullptr) { return false; } if (pixel_shader != nullptr) { const Shader::ConstantRegisterMap& float_constant_map_pixel = pixel_shader->constant_register_map(); for (uint32_t i = 0; i < 4; ++i) { uint64_t float_constant_map_entry = float_constant_map_pixel.float_bitmap[i]; uint32_t float_constant_index; while (xe::bit_scan_forward(float_constant_map_entry, &float_constant_index)) { float_constant_map_entry &= ~(1ull << float_constant_index); std::memcpy(float_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + (float_constant_index << 2)] .f32, 4 * sizeof(float)); float_constants += 4 * sizeof(float); } } } cbuffer_bindings_float_pixel_.up_to_date = true; write_float_constant_view_pixel = true; } if (!cbuffer_bindings_bool_loop_.up_to_date) { uint32_t* bool_loop_constants = reinterpret_cast(constant_buffer_pool_->RequestFull( 768, nullptr, nullptr, &cbuffer_bindings_bool_loop_.buffer_address)); if (bool_loop_constants == nullptr) { return false; } // Bool and loop constants are quadrupled to allow dynamic indexing. for (uint32_t i = 0; i < 40; ++i) { uint32_t bool_loop_constant = regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32; uint32_t* bool_loop_constant_vector = bool_loop_constants + (i << 2); bool_loop_constant_vector[0] = bool_loop_constant; bool_loop_constant_vector[1] = bool_loop_constant; bool_loop_constant_vector[2] = bool_loop_constant; bool_loop_constant_vector[3] = bool_loop_constant; } cbuffer_bindings_bool_loop_.up_to_date = true; write_bool_loop_constant_view = true; } if (!cbuffer_bindings_fetch_.up_to_date) { uint8_t* fetch_constants = constant_buffer_pool_->RequestFull( 768, nullptr, nullptr, &cbuffer_bindings_fetch_.buffer_address); if (fetch_constants == nullptr) { return false; } std::memcpy(fetch_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, 32 * 6 * sizeof(uint32_t)); cbuffer_bindings_fetch_.up_to_date = true; write_fetch_constant_view = true; } // Allocate the descriptors. uint32_t view_count_partial_update = 0; if (write_system_constant_view) { ++view_count_partial_update; } if (write_float_constant_view_vertex) { ++view_count_partial_update; } if (write_float_constant_view_pixel) { ++view_count_partial_update; } if (write_bool_loop_constant_view) { ++view_count_partial_update; } if (write_fetch_constant_view) { ++view_count_partial_update; } if (write_textures_vertex) { view_count_partial_update += texture_count_vertex; } if (write_textures_pixel) { view_count_partial_update += texture_count_pixel; } // All the constants + shared memory + textures. uint32_t view_count_full_update = 6 + texture_count_vertex + texture_count_pixel; if (IsROVUsedForEDRAM()) { // + EDRAM UAV. ++view_count_full_update; } D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle; uint32_t descriptor_size_view = provider->GetViewDescriptorSize(); uint64_t view_full_update_index = RequestViewDescriptors( draw_view_full_update_, view_count_partial_update, view_count_full_update, view_cpu_handle, view_gpu_handle); if (view_full_update_index == 0) { XELOGE("Failed to allocate view descriptors!"); return false; } uint32_t sampler_count_partial_update = 0; if (write_samplers_vertex) { sampler_count_partial_update += sampler_count_vertex; } if (write_samplers_pixel) { sampler_count_partial_update += sampler_count_pixel; } D3D12_CPU_DESCRIPTOR_HANDLE sampler_cpu_handle = {}; D3D12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle = {}; uint32_t descriptor_size_sampler = provider->GetSamplerDescriptorSize(); uint64_t sampler_full_update_index = 0; if (sampler_count_vertex != 0 || sampler_count_pixel != 0) { sampler_full_update_index = RequestSamplerDescriptors( draw_sampler_full_update_, sampler_count_partial_update, sampler_count_vertex + sampler_count_pixel, sampler_cpu_handle, sampler_gpu_handle); if (sampler_full_update_index == 0) { XELOGE("Failed to allocate sampler descriptors!"); return false; } } if (draw_view_full_update_ != view_full_update_index) { // Need to update all view descriptors. write_system_constant_view = true; write_fetch_constant_view = true; write_float_constant_view_vertex = true; write_float_constant_view_pixel = true; write_bool_loop_constant_view = true; write_textures_vertex = texture_count_vertex != 0; write_textures_pixel = texture_count_pixel != 0; texture_bindings_written_vertex_ = false; texture_bindings_written_pixel_ = false; // If updating fully, write the shared memory descriptor (t0) and, if // needed, the EDRAM descriptor (u0). shared_memory_->CreateSRV(view_cpu_handle); gpu_handle_shared_memory_and_edram_ = view_gpu_handle; view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; if (IsROVUsedForEDRAM()) { render_target_cache_->CreateEDRAMUint32UAV(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; } current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_SharedMemoryAndEDRAM); } if (draw_sampler_full_update_ != sampler_full_update_index) { write_samplers_vertex = sampler_count_vertex != 0; write_samplers_pixel = sampler_count_pixel != 0; samplers_written_vertex_ = false; samplers_written_pixel_ = false; } // Write the descriptors. D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_desc; if (write_system_constant_view) { gpu_handle_system_constants_ = view_gpu_handle; constant_buffer_desc.BufferLocation = cbuffer_bindings_system_.buffer_address; constant_buffer_desc.SizeInBytes = xe::align(uint32_t(sizeof(system_constants_)), 256u); device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_SystemConstants); } if (write_float_constant_view_vertex) { gpu_handle_float_constants_vertex_ = view_gpu_handle; constant_buffer_desc.BufferLocation = cbuffer_bindings_float_vertex_.buffer_address; constant_buffer_desc.SizeInBytes = float_constant_size_vertex; device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_FloatConstantsVertex); } if (write_float_constant_view_pixel) { gpu_handle_float_constants_pixel_ = view_gpu_handle; constant_buffer_desc.BufferLocation = cbuffer_bindings_float_pixel_.buffer_address; constant_buffer_desc.SizeInBytes = float_constant_size_pixel; device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_FloatConstantsPixel); } if (write_bool_loop_constant_view) { gpu_handle_bool_loop_constants_ = view_gpu_handle; constant_buffer_desc.BufferLocation = cbuffer_bindings_bool_loop_.buffer_address; constant_buffer_desc.SizeInBytes = 768; device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_BoolLoopConstants); } if (write_fetch_constant_view) { gpu_handle_fetch_constants_ = view_gpu_handle; constant_buffer_desc.BufferLocation = cbuffer_bindings_fetch_.buffer_address; constant_buffer_desc.SizeInBytes = 768; device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_FetchConstants); } if (write_textures_vertex) { assert_true(current_graphics_root_extras_.textures_vertex != RootExtraParameterIndices::kUnavailable); gpu_handle_textures_vertex_ = view_gpu_handle; for (uint32_t i = 0; i < texture_count_vertex; ++i) { const D3D12Shader::TextureSRV& srv = textures_vertex[i]; texture_cache_->WriteTextureSRV(srv, view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; } texture_bindings_written_vertex_ = true; current_texture_bindings_hash_vertex_ = texture_bindings_hash_vertex; current_graphics_root_up_to_date_ &= ~(1u << current_graphics_root_extras_.textures_vertex); } if (write_textures_pixel) { assert_true(current_graphics_root_extras_.textures_pixel != RootExtraParameterIndices::kUnavailable); gpu_handle_textures_pixel_ = view_gpu_handle; for (uint32_t i = 0; i < texture_count_pixel; ++i) { const D3D12Shader::TextureSRV& srv = textures_pixel[i]; texture_cache_->WriteTextureSRV(srv, view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; } texture_bindings_written_pixel_ = true; current_texture_bindings_hash_pixel_ = texture_bindings_hash_pixel; current_graphics_root_up_to_date_ &= ~(1u << current_graphics_root_extras_.textures_pixel); } if (write_samplers_vertex) { assert_true(current_graphics_root_extras_.samplers_vertex != RootExtraParameterIndices::kUnavailable); gpu_handle_samplers_vertex_ = sampler_gpu_handle; for (uint32_t i = 0; i < sampler_count_vertex; ++i) { texture_cache_->WriteSampler( texture_cache_->GetSamplerParameters(samplers_vertex[i]), sampler_cpu_handle); sampler_cpu_handle.ptr += descriptor_size_sampler; sampler_gpu_handle.ptr += descriptor_size_sampler; } samplers_written_vertex_ = true; current_samplers_hash_vertex_ = samplers_hash_vertex; current_graphics_root_up_to_date_ &= ~(1u << current_graphics_root_extras_.samplers_vertex); } if (write_samplers_pixel) { assert_true(current_graphics_root_extras_.samplers_pixel != RootExtraParameterIndices::kUnavailable); gpu_handle_samplers_pixel_ = sampler_gpu_handle; for (uint32_t i = 0; i < sampler_count_pixel; ++i) { texture_cache_->WriteSampler( texture_cache_->GetSamplerParameters(samplers_pixel[i]), sampler_cpu_handle); sampler_cpu_handle.ptr += descriptor_size_sampler; sampler_gpu_handle.ptr += descriptor_size_sampler; } samplers_written_pixel_ = true; current_samplers_hash_pixel_ = samplers_hash_pixel; current_graphics_root_up_to_date_ &= ~(1u << current_graphics_root_extras_.samplers_pixel); } // Wrote new descriptors on the current page. draw_view_full_update_ = view_full_update_index; draw_sampler_full_update_ = sampler_full_update_index; // Update the root parameters. if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_FetchConstants))) { command_list->SetGraphicsRootDescriptorTable(kRootParameter_FetchConstants, gpu_handle_fetch_constants_); current_graphics_root_up_to_date_ |= 1u << kRootParameter_FetchConstants; } if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_FloatConstantsVertex))) { command_list->SetGraphicsRootDescriptorTable( kRootParameter_FloatConstantsVertex, gpu_handle_float_constants_vertex_); current_graphics_root_up_to_date_ |= 1u << kRootParameter_FloatConstantsVertex; } if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_FloatConstantsPixel))) { command_list->SetGraphicsRootDescriptorTable( kRootParameter_FloatConstantsPixel, gpu_handle_float_constants_pixel_); current_graphics_root_up_to_date_ |= 1u << kRootParameter_FloatConstantsPixel; } if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_SystemConstants))) { command_list->SetGraphicsRootDescriptorTable(kRootParameter_SystemConstants, gpu_handle_system_constants_); current_graphics_root_up_to_date_ |= 1u << kRootParameter_SystemConstants; } if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_BoolLoopConstants))) { command_list->SetGraphicsRootDescriptorTable( kRootParameter_BoolLoopConstants, gpu_handle_bool_loop_constants_); current_graphics_root_up_to_date_ |= 1u << kRootParameter_BoolLoopConstants; } if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_SharedMemoryAndEDRAM))) { command_list->SetGraphicsRootDescriptorTable( kRootParameter_SharedMemoryAndEDRAM, gpu_handle_shared_memory_and_edram_); current_graphics_root_up_to_date_ |= 1u << kRootParameter_SharedMemoryAndEDRAM; } uint32_t extra_index; extra_index = current_graphics_root_extras_.textures_pixel; if (extra_index != RootExtraParameterIndices::kUnavailable && !(current_graphics_root_up_to_date_ & (1u << extra_index))) { command_list->SetGraphicsRootDescriptorTable(extra_index, gpu_handle_textures_pixel_); current_graphics_root_up_to_date_ |= 1u << extra_index; } extra_index = current_graphics_root_extras_.samplers_pixel; if (extra_index != RootExtraParameterIndices::kUnavailable && !(current_graphics_root_up_to_date_ & (1u << extra_index))) { command_list->SetGraphicsRootDescriptorTable(extra_index, gpu_handle_samplers_pixel_); current_graphics_root_up_to_date_ |= 1u << extra_index; } extra_index = current_graphics_root_extras_.textures_vertex; if (extra_index != RootExtraParameterIndices::kUnavailable && !(current_graphics_root_up_to_date_ & (1u << extra_index))) { command_list->SetGraphicsRootDescriptorTable(extra_index, gpu_handle_textures_vertex_); current_graphics_root_up_to_date_ |= 1u << extra_index; } extra_index = current_graphics_root_extras_.samplers_vertex; if (extra_index != RootExtraParameterIndices::kUnavailable && !(current_graphics_root_up_to_date_ & (1u << extra_index))) { command_list->SetGraphicsRootDescriptorTable(extra_index, gpu_handle_samplers_vertex_); current_graphics_root_up_to_date_ |= 1u << extra_index; } return true; } } // namespace d3d12 } // namespace gpu } // namespace xe