diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 4a3511d54..5b306509f 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -257,19 +257,30 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( range.OffsetInDescriptorsFromTableStart = 0; } - // Shared memory. + // Shared memory and, if ROVs are used, EDRAM. + D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[2]; { - auto& parameter = parameters[kRootParameter_SharedMemory]; - auto& range = ranges[kRootParameter_SharedMemory]; + auto& parameter = parameters[kRootParameter_SharedMemoryAndEDRAM]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; - parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.DescriptorTable.pDescriptorRanges = + shared_memory_and_edram_ranges; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = 1; - range.BaseShaderRegister = 0; - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = 0; + shared_memory_and_edram_ranges[0].RangeType = + D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + shared_memory_and_edram_ranges[0].NumDescriptors = 1; + shared_memory_and_edram_ranges[0].BaseShaderRegister = 0; + shared_memory_and_edram_ranges[0].RegisterSpace = 0; + shared_memory_and_edram_ranges[0].OffsetInDescriptorsFromTableStart = 0; + if (render_target_cache_->IsROVUsedForEDRAM()) { + ++parameter.DescriptorTable.NumDescriptorRanges; + shared_memory_and_edram_ranges[1].RangeType = + D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + shared_memory_and_edram_ranges[1].NumDescriptors = 1; + shared_memory_and_edram_ranges[1].BaseShaderRegister = 0; + shared_memory_and_edram_ranges[1].RegisterSpace = 0; + shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1; + } } // Extra parameters. @@ -1210,6 +1221,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); } + if (render_target_cache_->IsROVUsedForEDRAM()) { + render_target_cache_->UseEDRAMAsUAV(); + } if (indexed) { uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32 ? sizeof(uint32_t) @@ -2080,6 +2094,10 @@ bool D3D12CommandProcessor::UpdateBindings( // All the constants + shared memory + textures. uint32_t view_count_full_update = 6 + texture_count_vertex + texture_count_pixel; + if (render_target_cache_->IsROVUsedForEDRAM()) { + // + EDRAM UAV. + ++view_count_full_update; + } D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle; uint32_t descriptor_size_view = provider->GetViewDescriptorSize(); @@ -2122,12 +2140,19 @@ bool D3D12CommandProcessor::UpdateBindings( write_textures_pixel = texture_count_pixel != 0; texture_bindings_written_vertex_ = false; texture_bindings_written_pixel_ = false; - // If updating fully, write the shared memory descriptor (t0). + // If updating fully, write the shared memory descriptor (t0) and, if + // needed, the EDRAM descriptor (u0). shared_memory_->CreateSRV(view_cpu_handle); - gpu_handle_shared_memory_ = view_gpu_handle; + gpu_handle_shared_memory_and_edram_ = view_gpu_handle; view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; - current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_SharedMemory); + if (render_target_cache_->IsROVUsedForEDRAM()) { + render_target_cache_->CreateEDRAMUint32UAV(view_cpu_handle); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + } + current_graphics_root_up_to_date_ &= + ~(1u << kRootParameter_SharedMemoryAndEDRAM); } if (draw_sampler_full_update_ != sampler_full_update_index) { write_samplers_vertex = sampler_count_vertex != 0; @@ -2295,10 +2320,12 @@ bool D3D12CommandProcessor::UpdateBindings( current_graphics_root_up_to_date_ |= 1u << kRootParameter_BoolLoopConstants; } if (!(current_graphics_root_up_to_date_ & - (1u << kRootParameter_SharedMemory))) { - command_list->SetGraphicsRootDescriptorTable(kRootParameter_SharedMemory, - gpu_handle_shared_memory_); - current_graphics_root_up_to_date_ |= 1u << kRootParameter_SharedMemory; + (1u << kRootParameter_SharedMemoryAndEDRAM))) { + command_list->SetGraphicsRootDescriptorTable( + kRootParameter_SharedMemoryAndEDRAM, + gpu_handle_shared_memory_and_edram_); + current_graphics_root_up_to_date_ |= 1u + << kRootParameter_SharedMemoryAndEDRAM; } uint32_t extra_index; extra_index = current_graphics_root_extras_.textures_pixel; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index cf53ddeb6..bf83041a7 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -157,8 +157,9 @@ class D3D12CommandProcessor : public CommandProcessor { // Pretty rarely used and rarely changed - flow control constants. kRootParameter_BoolLoopConstants, // Never changed except for when starting a new descriptor heap - shared - // memory byte address buffer (t0). - kRootParameter_SharedMemory, + // memory byte address buffer (t0) and, if ROV is used for EDRAM, EDRAM UAV + // (u0). + kRootParameter_SharedMemoryAndEDRAM, kRootParameter_Count_Base, @@ -333,7 +334,7 @@ class D3D12CommandProcessor : public CommandProcessor { D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_float_constants_pixel_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_bool_loop_constants_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_fetch_constants_; - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_edram_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_vertex_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_pixel_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_samplers_vertex_; diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index 94fd99ef6..b97b23905 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -1108,10 +1108,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, provider->OffsetViewDescriptor(descriptor_cpu_start, 1)); // Transition the buffers. - command_processor_->PushTransitionBarrier( - edram_buffer_, edram_buffer_state_, - D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); shared_memory->UseForWriting(); command_processor_->SubmitBarriers(); @@ -1242,10 +1239,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, // Load the EDRAM buffer contents to the copy buffer. - command_processor_->PushTransitionBarrier( - edram_buffer_, edram_buffer_state_, - D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); command_processor_->SubmitBarriers(); command_list->SetComputeRootSignature(edram_load_store_root_signature_); @@ -1511,10 +1505,7 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base, } // Submit the clear. - command_processor_->PushTransitionBarrier( - edram_buffer_, edram_buffer_state_, - D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); command_processor_->SubmitBarriers(); EDRAMLoadStoreRootConstants root_constants; root_constants.clear_rect_lt = (clear_rect.left << samples_x_log2) | @@ -1728,6 +1719,25 @@ void RenderTargetCache::UnbindRenderTargets() { ClearBindings(); } +void RenderTargetCache::UseEDRAMAsUAV() { + TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); +} + +void RenderTargetCache::CreateEDRAMUint32UAV( + D3D12_CPU_DESCRIPTOR_HANDLE handle) { + auto device = + command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); + D3D12_UNORDERED_ACCESS_VIEW_DESC desc; + desc.Format = DXGI_FORMAT_R32_UINT; + desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + desc.Buffer.FirstElement = 0; + desc.Buffer.NumElements = kEDRAMBufferSize / sizeof(uint32_t); + desc.Buffer.StructureByteStride = 0; + desc.Buffer.CounterOffsetInBytes = 0; + desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + device->CreateUnorderedAccessView(edram_buffer_, nullptr, &desc, handle); +} + void RenderTargetCache::EndFrame() { UnbindRenderTargets(); } ColorRenderTargetFormat RenderTargetCache::GetBaseColorFormat( @@ -1773,6 +1783,12 @@ DXGI_FORMAT RenderTargetCache::GetColorDXGIFormat( return DXGI_FORMAT_UNKNOWN; } +void RenderTargetCache::TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state) { + command_processor_->PushTransitionBarrier(edram_buffer_, edram_buffer_state_, + new_state); + edram_buffer_state_ = new_state; +} + void RenderTargetCache::ClearBindings() { current_surface_pitch_ = 0; current_msaa_samples_ = MsaaSamples::k1X; @@ -2095,10 +2111,7 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() { D3D12_RESOURCE_STATE_COPY_SOURCE); render_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE; } - command_processor_->PushTransitionBarrier( - edram_buffer_, edram_buffer_state_, - D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); // Set up the bindings. auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); @@ -2253,10 +2266,7 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM( D3D12_RESOURCE_STATE_COPY_DEST); render_target->state = D3D12_RESOURCE_STATE_COPY_DEST; } - command_processor_->PushTransitionBarrier( - edram_buffer_, edram_buffer_state_, - D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); // Set up the bindings. auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index 83cc6649a..7347b68e3 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -264,6 +264,9 @@ class RenderTargetCache { // the command processor takes over framebuffer bindings to draw something // special. void UnbindRenderTargets(); + // Transitions the EDRAM buffer to a UAV - for use with ROV rendering. + void UseEDRAMAsUAV(); + void CreateEDRAMUint32UAV(D3D12_CPU_DESCRIPTOR_HANDLE handle); void EndFrame(); // Totally necessary to rely on the base format - Too Human switches between @@ -392,6 +395,8 @@ class RenderTargetCache { uint32_t copy_buffer_size; }; + void TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state); + void ClearBindings(); // Checks if the heap for the render target exists and tries to create it if diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 52da6a7ae..3efa9bb94 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -7502,8 +7502,13 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { // Bound resource count (samplers, SRV, UAV, CBV). // + 1 for shared memory (vfetches can probably appear in pixel shaders too, // they are handled safely there anyway). - shader_object_.push_back(uint32_t(sampler_bindings_.size()) + 1 + - uint32_t(texture_srvs_.size()) + cbuffer_count_); + uint32_t resource_count = uint32_t(sampler_bindings_.size()) + 1 + + uint32_t(texture_srvs_.size()) + cbuffer_count_; + if (is_pixel_shader() && edram_rov_used_) { + // EDRAM. + ++resource_count; + } + shader_object_.push_back(resource_count); // Bound resource buffer offset (set later). shader_object_.push_back(0); if (is_vertex_shader()) { @@ -7797,7 +7802,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { } // *************************************************************************** - // Bindings, in s#, t#, cb# order + // Bindings, in s#, t#, u#, cb# order // *************************************************************************** // Write used resource names, except for constant buffers because we have @@ -7815,6 +7820,10 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) { new_offset += AppendString(shader_object_, texture_srvs_[i].name.c_str()); } + uint32_t edram_name_offset = new_offset; + if (is_pixel_shader() && edram_rov_used_) { + new_offset += AppendString(shader_object_, "xe_edram"); + } // Write the offset to the header. shader_object_[chunk_position_dwords + 3] = new_offset; @@ -7900,6 +7909,29 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { texture_name_offset += GetStringLength(texture_srv.name.c_str()); } + if (is_pixel_shader() && edram_rov_used_) { + // EDRAM uint32 buffer. + shader_object_.push_back(edram_name_offset); + // D3D_SIT_UAV_RWTYPED. + shader_object_.push_back(4); + // D3D_RETURN_TYPE_UINT. + shader_object_.push_back(4); + // D3D_UAV_DIMENSION_BUFFER. + shader_object_.push_back(1); + // Not multisampled. + shader_object_.push_back(0xFFFFFFFFu); + // Register u0. + shader_object_.push_back(0); + // One binding. + shader_object_.push_back(1); + // No D3D_SHADER_INPUT_FLAGS. + shader_object_.push_back(0); + // Register space 0. + shader_object_.push_back(0); + // UAV ID U0. + shader_object_.push_back(0); + } + // Constant buffers. for (uint32_t i = 0; i < cbuffer_count_; ++i) { uint32_t register_index = 0; @@ -8297,8 +8329,8 @@ void DxbcShaderTranslator::WriteShaderCode() { } shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_RESOURCE) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7) | - ENCODE_D3D10_SB_RESOURCE_DIMENSION(texture_srv_dimension)); + ENCODE_D3D10_SB_RESOURCE_DIMENSION(texture_srv_dimension) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_object_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 3)); // T0 is shared memory. @@ -8314,6 +8346,28 @@ void DxbcShaderTranslator::WriteShaderCode() { shader_object_.push_back(0); } + // Unordered access views. + if (is_pixel_shader() && edram_rov_used_) { + // EDRAM uint32 rasterizer-ordered buffer (U0, at u0, space0). + shader_object_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE( + D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) | + ENCODE_D3D10_SB_RESOURCE_DIMENSION(D3D10_SB_RESOURCE_DIMENSION_BUFFER) | + D3D11_SB_RASTERIZER_ORDERED_ACCESS | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_object_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 3)); + shader_object_.push_back(0); + shader_object_.push_back(0); + shader_object_.push_back(0); + shader_object_.push_back( + ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 0) | + ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 1) | + ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 2) | + ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 3)); + shader_object_.push_back(0); + } + // Inputs and outputs. if (is_vertex_shader()) { // Unswapped vertex index input (only X component). diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 82130ea3a..083d1331c 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -607,7 +607,7 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t c_barrier_instructions; // Unknown in Wine. uint32_t c_interlocked_instructions; - // Unknown in Wine. + // Unknown in Wine, but confirmed by testing. uint32_t c_texture_store_instructions; }; Statistics stat_;