diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 19b6b9840f..ed22bca224 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -8,19 +8,75 @@ #include "D3D12Formats.h" #include "../rsx_methods.h" - -std::vector D3D12GSRender::upload_vertex_attributes(const std::vector > &vertex_ranges) +namespace { - std::vector vertex_buffer_views; + UINT get_component_mapping_from_vector_size(u8 size) + { + switch (size) + { + case 1: + return D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1); + case 2: + return D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1); + case 3: + return D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1, + D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2, + D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1); + case 4: + return D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + } + throw EXCEPTION("Wrong vector size %d", size); + } - m_ia_set.clear(); - size_t input_slot = 0; + u32 get_vertex_count(const std::vector > first_count_commands) + { + u32 vertex_count = 0; + for (const auto &pair : first_count_commands) + vertex_count += pair.second; + return vertex_count; + } - size_t vertex_count = 0; + D3D12_SHADER_RESOURCE_VIEW_DESC get_vertex_attribute_srv(const rsx::data_array_format_info &info, UINT64 offset_in_vertex_buffers_buffer, UINT buffer_size) + { + u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size); + D3D12_SHADER_RESOURCE_VIEW_DESC vertex_buffer_view = { + get_vertex_attribute_format(info.type, info.size), + D3D12_SRV_DIMENSION_BUFFER, + get_component_mapping_from_vector_size(info.size) + }; + vertex_buffer_view.Buffer.FirstElement = offset_in_vertex_buffers_buffer / element_size; + vertex_buffer_view.Buffer.NumElements = buffer_size / element_size; + return vertex_buffer_view; + } - for (const auto &pair : vertex_ranges) - vertex_count += pair.second; + template + UINT64 get_next_multiple_of(UINT64 val) + { + UINT64 divided_val = (val + N - 1) / N; + return divided_val * N; + } +} + +std::vector D3D12GSRender::upload_vertex_attributes( + const std::vector > &vertex_ranges, + gsl::not_null command_list) +{ + std::vector vertex_buffer_views; + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data.Get(), D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST)); + + u32 vertex_count = get_vertex_count(vertex_ranges); + size_t offset_in_vertex_buffers_buffer = 0; u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK]; for (int index = 0; index < rsx::limits::vertex_count; ++index) @@ -35,8 +91,7 @@ std::vector D3D12GSRender::upload_vertex_attributes(co const rsx::data_array_format_info &info = vertex_arrays_info[index]; u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size); - - size_t buffer_size = element_size * vertex_count; + UINT buffer_size = element_size * vertex_count; size_t heap_offset = m_buffer_data.alloc(buffer_size); void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); @@ -47,64 +102,104 @@ std::vector D3D12GSRender::upload_vertex_attributes(co } m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = - { - m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, - (UINT)buffer_size, - (UINT)element_size - }; - vertex_buffer_views.push_back(vertex_buffer_view); + command_list->CopyBufferRegion(m_vertex_buffer_data.Get(), offset_in_vertex_buffers_buffer, m_buffer_data.get_heap(), heap_offset, buffer_size); + + vertex_buffer_views.emplace_back(get_vertex_attribute_srv(info, offset_in_vertex_buffers_buffer, buffer_size)); + offset_in_vertex_buffers_buffer = get_next_multiple_of<48>(offset_in_vertex_buffers_buffer + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16 m_timers.buffer_upload_size += buffer_size; - D3D12_INPUT_ELEMENT_DESC IAElement = {}; - IAElement.SemanticName = "TEXCOORD"; - IAElement.SemanticIndex = (UINT)index; - IAElement.InputSlot = (UINT)input_slot++; - IAElement.Format = get_vertex_attribute_format(info.type, info.size); - IAElement.AlignedByteOffset = 0; - IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - IAElement.InstanceDataStepRate = 0; - m_ia_set.push_back(IAElement); } else if (register_vertex_info[index].size > 0) { // In register vertex attribute const rsx::data_array_format_info &info = register_vertex_info[index]; - const std::vector &data = register_vertex_data[index]; u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size); - - size_t buffer_size = data.size(); + UINT buffer_size = gsl::narrow(data.size()); size_t heap_offset = m_buffer_data.alloc(buffer_size); void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); memcpy(mapped_buffer, data.data(), data.size()); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = { - m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, - (UINT)buffer_size, - (UINT)element_size - }; - vertex_buffer_views.push_back(vertex_buffer_view); + command_list->CopyBufferRegion(m_vertex_buffer_data.Get(), offset_in_vertex_buffers_buffer, m_buffer_data.get_heap(), heap_offset, buffer_size); - D3D12_INPUT_ELEMENT_DESC IAElement = {}; - IAElement.SemanticName = "TEXCOORD"; - IAElement.SemanticIndex = (UINT)index; - IAElement.InputSlot = (UINT)input_slot++; - IAElement.Format = get_vertex_attribute_format(info.type, info.size); - IAElement.AlignedByteOffset = 0; - IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; - IAElement.InstanceDataStepRate = 1; - m_ia_set.push_back(IAElement); + vertex_buffer_views.emplace_back(get_vertex_attribute_srv(info, offset_in_vertex_buffers_buffer, buffer_size)); + offset_in_vertex_buffers_buffer = get_next_multiple_of<48>(offset_in_vertex_buffers_buffer + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16 } } - + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER)); return vertex_buffer_views; } +namespace +{ +std::tuple, size_t> upload_inlined_vertex_array( + gsl::span vertex_attribute_infos, + gsl::span inlined_array_raw_data, + data_heap& ring_buffer_data, + ID3D12Resource* vertex_buffer_placement, + ID3D12GraphicsCommandList* command_list + ) +{ + // We can't rely on vertex_attribute_infos strides here so compute it + // assuming all attributes are packed + u32 stride = 0; + for (const auto &info : vertex_attribute_infos) + { + if (!info.size) // disabled + continue; + + stride += rsx::get_vertex_type_size_on_host(info.type, info.size); + } + + u32 element_count = gsl::narrow(inlined_array_raw_data.size_bytes()) / stride; + std::vector result; + + UINT64 vertex_buffer_offset = 0; + for (const auto &info : vertex_attribute_infos) + { + if (!info.size) // disabled + continue; + + u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size); + UINT buffer_size = element_size * element_count; + size_t heap_offset = ring_buffer_data.alloc(buffer_size); + + void *mapped_buffer = ring_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + gsl::span dst = { (gsl::byte*)mapped_buffer, buffer_size }; + + for (u32 i = 0; i < element_count; i++) + { + auto subdst = dst.subspan(i * element_size, element_size); + auto subsrc = inlined_array_raw_data.subspan(i * stride, element_size); + if (info.type == rsx::vertex_base_type::ub && info.size == 4) + { + subdst[0] = subsrc[3]; + subdst[1] = subsrc[2]; + subdst[2] = subsrc[1]; + subdst[3] = subsrc[0]; + } + else + { + std::copy(subsrc.begin(), subsrc.end(), subdst.begin()); + } + } + + ring_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + + command_list->CopyBufferRegion(vertex_buffer_placement, vertex_buffer_offset, ring_buffer_data.get_heap(), heap_offset, buffer_size); + + result.emplace_back(get_vertex_attribute_srv(info, vertex_buffer_offset, buffer_size)); + vertex_buffer_offset = get_next_multiple_of<48>(vertex_buffer_offset + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16 + } + + return std::make_tuple(result, element_count); +} +} + void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex) { size_t heap_offset = m_buffer_data.alloc(256); @@ -187,47 +282,7 @@ void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_ } -std::tuple D3D12GSRender::upload_inlined_vertex_array() -{ - UINT offset = 0; - m_ia_set.clear(); - // Bind attributes - for (int index = 0; index < rsx::limits::vertex_count; ++index) - { - const auto &info = vertex_arrays_info[index]; - if (!info.size) // disabled - continue; - - D3D12_INPUT_ELEMENT_DESC IAElement = {}; - IAElement.SemanticName = "TEXCOORD"; - IAElement.SemanticIndex = (UINT)index; - IAElement.InputSlot = 0; - IAElement.Format = get_vertex_attribute_format(info.type, info.size); - IAElement.AlignedByteOffset = offset; - IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - IAElement.InstanceDataStepRate = 0; - m_ia_set.push_back(IAElement); - - offset += rsx::get_vertex_type_size_on_host(info.type, info.size); - } - - // Copy inline buffer - size_t buffer_size = inline_vertex_array.size() * sizeof(int); - size_t heap_offset = m_buffer_data.alloc(buffer_size); - void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - write_inline_array_to_buffer(mapped_buffer); - m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - - D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = - { - m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, - (UINT)buffer_size, - (UINT)offset - }; - - return std::make_tuple(vertex_buffer_view, (u32)buffer_size / offset); -} std::tuple D3D12GSRender::generate_index_buffer_for_emulated_primitives_array(const std::vector > &vertex_ranges) { @@ -258,53 +313,46 @@ std::tuple D3D12GSRender::generate_index_buffer return std::make_tuple(index_buffer_view, index_count); } -std::tuple D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list) +std::tuple> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list) { if (draw_command == rsx::draw_command::inlined_array) { size_t vertex_count; - D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view; - std::tie(vertex_buffer_view, vertex_count) = upload_inlined_vertex_array(); - command_list->IASetVertexBuffers(0, (UINT)1, &vertex_buffer_view); + std::vector vertex_buffer_view; + std::tie(vertex_buffer_view, vertex_count) = upload_inlined_vertex_array( + vertex_arrays_info, + { (const gsl::byte*) inline_vertex_array.data(), gsl::narrow(inline_vertex_array.size() * sizeof(uint)) }, + m_buffer_data, m_vertex_buffer_data.Get(), command_list); if (is_primitive_native(draw_mode)) - return std::make_tuple(false, vertex_count); + return std::make_tuple(false, vertex_count, vertex_buffer_view); D3D12_INDEX_BUFFER_VIEW index_buffer_view; size_t index_count; std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array({ { 0, (u32)vertex_count } }); command_list->IASetIndexBuffer(&index_buffer_view); - return std::make_tuple(true, index_count); + return std::make_tuple(true, index_count, vertex_buffer_view); } if (draw_command == rsx::draw_command::array) { - const std::vector &vertex_buffer_views = upload_vertex_attributes(first_count_commands); - command_list->IASetVertexBuffers(0, (UINT)vertex_buffer_views.size(), vertex_buffer_views.data()); - if (is_primitive_native(draw_mode)) { - // Index count - size_t vertex_count = 0; - for (const auto &pair : first_count_commands) - vertex_count += pair.second; - return std::make_tuple(false, vertex_count); + size_t vertex_count = get_vertex_count(first_count_commands); + return std::make_tuple(false, vertex_count, upload_vertex_attributes(first_count_commands, command_list)); } D3D12_INDEX_BUFFER_VIEW index_buffer_view; size_t index_count; std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array(first_count_commands); command_list->IASetIndexBuffer(&index_buffer_view); - return std::make_tuple(true, index_count); + return std::make_tuple(true, index_count, upload_vertex_attributes(first_count_commands, command_list)); } assert(draw_command == rsx::draw_command::indexed); // Index count - size_t index_count = 0; - for (const auto &pair : first_count_commands) - index_count += pair.second; - index_count = get_index_count(draw_mode, gsl::narrow(index_count)); + size_t index_count = get_index_count(draw_mode, gsl::narrow(get_vertex_count(first_count_commands))); rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); size_t index_size = get_index_type_size(indexed_type); @@ -337,10 +385,7 @@ std::tuple D3D12GSRender::upload_and_set_vertex_index_data(ID3D12G m_timers.buffer_upload_size += buffer_size; command_list->IASetIndexBuffer(&index_buffer_view); - const std::vector &vertex_buffer_views = upload_vertex_attributes({ std::make_pair(0, max_index + 1) }); - command_list->IASetVertexBuffers(0, (UINT)vertex_buffer_views.size(), vertex_buffer_views.data()); - - return std::make_tuple(true, index_count); + return std::make_tuple(true, index_count, upload_vertex_attributes({ std::make_pair(0, max_index + 1) }, command_list)); } #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index d6f55d2fd6..cf63b6f4b8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -123,7 +123,7 @@ void D3D12FragmentDecompiler::insertConstants(std::stringstream & OS) for (ParamItem PI : PT.items) { size_t textureIndex = atoi(PI.name.data() + 3); - OS << "Texture2D " << PI.name << " : register(t" << textureIndex << ");" << std::endl; + OS << "Texture2D " << PI.name << " : register(t" << textureIndex + 16 << ");" << std::endl; OS << "sampler " << PI.name << "sampler : register(s" << textureIndex << ");" << std::endl; } } @@ -141,7 +141,7 @@ void D3D12FragmentDecompiler::insertConstants(std::stringstream & OS) for (const ParamItem &PI : PT.items) { size_t textureIndex = atoi(PI.name.data() + 3); - OS << "TextureCube " << PI.name << " : register(t" << textureIndex << ");" << std::endl; + OS << "TextureCube " << PI.name << " : register(t" << textureIndex + 16 << ");" << std::endl; OS << "sampler " << PI.name << "sampler : register(s" << textureIndex << ");" << std::endl; } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 6724b57c48..2121728680 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -135,33 +135,43 @@ D3D12GSRender::D3D12GSRender() m_device->CreateRenderTargetView(m_backbuffer[1].Get(), &renter_target_view_desc, m_backbuffer_descriptor_heap[1]->GetCPUDescriptorHandleForHeapStart()); // Common root signatures - for (unsigned texture_count = 0; texture_count < 17; texture_count++) + for (int vertex_buffer_count = 0; vertex_buffer_count < 17; vertex_buffer_count++) // Some app (naruto ultimate ninja storm 2) uses a shader without inputs... { - CD3DX12_DESCRIPTOR_RANGE descriptorRange[] = + for (unsigned texture_count = 0; texture_count < 17; texture_count++) { - // Scale Offset data - CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0), - // Constants - CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 2, 1), - // Textures - CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_count, 0), - // Samplers - CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, texture_count, 0), - }; - CD3DX12_ROOT_PARAMETER RP[2]; - RP[0].InitAsDescriptorTable((texture_count > 0) ? 3 : 2, &descriptorRange[0]); - RP[1].InitAsDescriptorTable(1, &descriptorRange[3]); + CD3DX12_DESCRIPTOR_RANGE descriptorRange[] = + { + // Vertex buffer + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, vertex_buffer_count, 0), + // Scale Offset data + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0), + // Constants + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 2, 1), + // Textures + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_count, 16), + // Samplers + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, texture_count, 0), + }; + CD3DX12_ROOT_PARAMETER RP[2]; + UINT cbv_srv_uav_descriptor_size = 4; + if (texture_count == 0) + cbv_srv_uav_descriptor_size -= 1; + if (vertex_buffer_count == 0) + cbv_srv_uav_descriptor_size -= 1; + RP[0].InitAsDescriptorTable(cbv_srv_uav_descriptor_size, (vertex_buffer_count > 0) ? &descriptorRange[0] : &descriptorRange[1]); + RP[1].InitAsDescriptorTable(1, &descriptorRange[4]); - Microsoft::WRL::ComPtr rootSignatureBlob; - Microsoft::WRL::ComPtr errorBlob; - CHECK_HRESULT(wrapD3D12SerializeRootSignature( - &CD3DX12_ROOT_SIGNATURE_DESC((texture_count > 0) ? 2 : 1, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), - D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); + Microsoft::WRL::ComPtr rootSignatureBlob; + Microsoft::WRL::ComPtr errorBlob; + CHECK_HRESULT(wrapD3D12SerializeRootSignature( + &CD3DX12_ROOT_SIGNATURE_DESC((texture_count > 0) ? 2 : 1, RP, 0, 0), + D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); - m_device->CreateRootSignature(0, - rootSignatureBlob->GetBufferPointer(), - rootSignatureBlob->GetBufferSize(), - IID_PPV_ARGS(m_root_signatures[texture_count].GetAddressOf())); + m_device->CreateRootSignature(0, + rootSignatureBlob->GetBufferPointer(), + rootSignatureBlob->GetBufferSize(), + IID_PPV_ARGS(m_root_signatures[texture_count][vertex_buffer_count].GetAddressOf())); + } } m_per_frame_storage[0].init(m_device.Get()); @@ -186,6 +196,17 @@ D3D12GSRender::D3D12GSRender() m_readback_resources.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_STATE_COPY_DEST); m_buffer_data.init(m_device.Get(), 1024 * 1024 * 896, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ); + CHECK_HRESULT( + m_device->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(1024 * 1024 * 16), + D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, + nullptr, + IID_PPV_ARGS(m_vertex_buffer_data.GetAddressOf()) + ) + ); + if (rpcs3::config.rsx.d3d12.overlay.value()) init_d2d_structures(); } @@ -241,9 +262,14 @@ void D3D12GSRender::end() std::chrono::time_point vertex_index_duration_start = std::chrono::system_clock::now(); + size_t currentDescriptorIndex = get_current_resource_storage().descriptors_heap_index; + size_t vertex_count; bool indexed_draw; - std::tie(indexed_draw, vertex_count) = upload_and_set_vertex_index_data(get_current_resource_storage().command_list.Get()); + std::vector vertex_buffer_views; + std::tie(indexed_draw, vertex_count, vertex_buffer_views) = upload_and_set_vertex_index_data(get_current_resource_storage().command_list.Get()); + + size_t vertex_buffer_count = vertex_buffer_views.size(); std::chrono::time_point vertex_index_duration_end = std::chrono::system_clock::now(); m_timers.vertex_index_duration += std::chrono::duration_cast(vertex_index_duration_end - vertex_index_duration_start).count(); @@ -253,16 +279,23 @@ void D3D12GSRender::end() std::chrono::time_point program_load_end = std::chrono::system_clock::now(); m_timers.program_load_duration += std::chrono::duration_cast(program_load_end - program_load_start).count(); - get_current_resource_storage().command_list->SetGraphicsRootSignature(m_root_signatures[std::get<2>(m_current_pso)].Get()); + get_current_resource_storage().command_list->SetGraphicsRootSignature(m_root_signatures[std::get<2>(m_current_pso)][vertex_buffer_count].Get()); get_current_resource_storage().command_list->OMSetStencilRef(rsx::method_registers[NV4097_SET_STENCIL_FUNC_REF]); std::chrono::time_point constants_duration_start = std::chrono::system_clock::now(); - size_t currentDescriptorIndex = get_current_resource_storage().descriptors_heap_index; + INT offset = 0; + for (const auto view : vertex_buffer_views) + { + m_device->CreateShaderResourceView(m_vertex_buffer_data.Get(), &view, + CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) + .Offset((INT)currentDescriptorIndex + offset++, m_descriptor_stride_srv_cbv_uav)); + } + // Constants - upload_and_bind_scale_offset_matrix(currentDescriptorIndex); - upload_and_bind_vertex_shader_constants(currentDescriptorIndex + 1); - upload_and_bind_fragment_shader_constants(currentDescriptorIndex + 2); + upload_and_bind_scale_offset_matrix(currentDescriptorIndex + vertex_buffer_count); + upload_and_bind_vertex_shader_constants(currentDescriptorIndex + 1 + vertex_buffer_count); + upload_and_bind_fragment_shader_constants(currentDescriptorIndex + 2 + vertex_buffer_count); std::chrono::time_point constants_duration_end = std::chrono::system_clock::now(); m_timers.constants_duration += std::chrono::duration_cast(constants_duration_end - constants_duration_start).count(); @@ -272,8 +305,7 @@ void D3D12GSRender::end() std::chrono::time_point texture_duration_start = std::chrono::system_clock::now(); if (std::get<2>(m_current_pso) > 0) { - upload_and_bind_textures(get_current_resource_storage().command_list.Get(), currentDescriptorIndex + 3, std::get<2>(m_current_pso) > 0); - + upload_and_bind_textures(get_current_resource_storage().command_list.Get(), currentDescriptorIndex + 3 + vertex_buffer_count, std::get<2>(m_current_pso) > 0); get_current_resource_storage().command_list->SetGraphicsRootDescriptorTable(0, CD3DX12_GPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetGPUDescriptorHandleForHeapStart()) @@ -285,16 +317,15 @@ void D3D12GSRender::end() ); get_current_resource_storage().current_sampler_index += std::get<2>(m_current_pso); - get_current_resource_storage().descriptors_heap_index += std::get<2>(m_current_pso) + 3; + get_current_resource_storage().descriptors_heap_index += std::get<2>(m_current_pso) + 3 + vertex_buffer_count; } else { - get_current_resource_storage().command_list->SetDescriptorHeaps(1, get_current_resource_storage().descriptors_heap.GetAddressOf()); get_current_resource_storage().command_list->SetGraphicsRootDescriptorTable(0, CD3DX12_GPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetGPUDescriptorHandleForHeapStart()) .Offset((INT)currentDescriptorIndex, m_descriptor_stride_srv_cbv_uav) ); - get_current_resource_storage().descriptors_heap_index += 3; + get_current_resource_storage().descriptors_heap_index += 3 + vertex_buffer_count; } std::chrono::time_point texture_duration_end = std::chrono::system_clock::now(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 2edd2d9aa9..1a4f6d4a40 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -56,7 +56,7 @@ private: ComPtr m_backbuffer[2]; ComPtr m_backbuffer_descriptor_heap[2]; // m_rootSignatures[N] is RS with N texture/sample - ComPtr m_root_signatures[17]; + ComPtr m_root_signatures[17][17]; // indexed by [texture count][vertex count] // TODO: Use a tree structure to parse more efficiently data_cache m_texture_cache; @@ -67,7 +67,7 @@ private: RSXVertexProgram m_vertex_program; RSXFragmentProgram m_fragment_program; PipelineStateObjectCache m_pso_cache; - std::tuple, std::vector, size_t> m_current_pso; + std::tuple, size_t, size_t> m_current_pso; struct { @@ -115,11 +115,10 @@ private: // Textures, constants, index and vertex buffers storage data_heap m_buffer_data; data_heap m_readback_resources; + ComPtr m_vertex_buffer_data; rsx::render_targets m_rtts; - std::vector m_ia_set; - INT m_descriptor_stride_srv_cbv_uav; INT m_descriptor_stride_dsv; INT m_descriptor_stride_rtv; @@ -145,16 +144,15 @@ private: * Non native primitive type are emulated by index buffers expansion. * Returns whether the draw call is indexed or not and the vertex count to draw. */ - std::tuple upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list); + std::tuple > upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list); /** * Upload all enabled vertex attributes for vertex in ranges described by vertex_ranges. * A range in vertex_range is a pair whose first element is the index of the beginning of the * range, and whose second element is the number of vertex in this range. */ - std::vector upload_vertex_attributes(const std::vector > &vertex_ranges); - - std::tuple upload_inlined_vertex_array(); + std::vector upload_vertex_attributes(const std::vector > &vertex_ranges, + gsl::not_null command_list); std::tuple generate_index_buffer_for_emulated_primitives_array(const std::vector > &vertex_ranges); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp b/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp index 0e1e3c5be7..02ce53395a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp @@ -106,7 +106,7 @@ void resource_storage::init(ID3D12Device *device) CHECK_HRESULT(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, command_allocator.Get(), nullptr, IID_PPV_ARGS(command_list.GetAddressOf()))); CHECK_HRESULT(command_list->Close()); - D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 10000, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; + D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 50000, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; CHECK_HRESULT(device->CreateDescriptorHeap(&descriptor_heap_desc, IID_PPV_ARGS(&descriptors_heap))); D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER , 2048, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE }; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 291b578867..27d6c716ee 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -241,7 +241,6 @@ void D3D12GSRender::load_program() for (unsigned i = 0; i < prop.numMRT; i++) prop.Blend.RenderTarget[i].RenderTargetWriteMask = mask; - prop.IASet = m_ia_set; if (!!rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE]) { rsx::index_array_type index_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index c2c771e45a..0023f3b8a0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -10,7 +10,6 @@ struct D3D12PipelineProperties D3D12_PRIMITIVE_TOPOLOGY_TYPE Topology; DXGI_FORMAT DepthStencilFormat; DXGI_FORMAT RenderTargetsFormat; - std::vector IASet; D3D12_BLEND_DESC Blend; unsigned numMRT : 3; D3D12_DEPTH_STENCIL_DESC DepthStencil; @@ -19,23 +18,6 @@ struct D3D12PipelineProperties bool operator==(const D3D12PipelineProperties &in) const { - if (IASet.size() != in.IASet.size()) - return false; - for (unsigned i = 0; i < IASet.size(); i++) - { - const D3D12_INPUT_ELEMENT_DESC &a = IASet[i], &b = in.IASet[i]; - if (a.AlignedByteOffset != b.AlignedByteOffset) - return false; - if (a.Format != b.Format) - return false; - if (a.InputSlot != b.InputSlot) - return false; - if (a.InstanceDataStepRate != b.InstanceDataStepRate) - return false; - if (a.SemanticIndex != b.SemanticIndex) - return false; - } - if (memcmp(&DepthStencil, &in.DepthStencil, sizeof(D3D12_DEPTH_STENCIL_DESC))) return false; if (memcmp(&Blend, &in.Blend, sizeof(D3D12_BLEND_DESC))) @@ -93,7 +75,7 @@ public: ComPtr bytecode; // For debugging std::string content; - std::vector vertex_shader_inputs; + size_t vertex_shader_input_count; std::vector FragmentConstantOffsetCache; size_t m_textureCount; @@ -118,29 +100,11 @@ bool has_attribute(size_t attribute, const std::vector return false; } -static -std::vector completes_IA_desc(const std::vector &desc, const std::vector &inputs) -{ - std::vector result(desc); - for (size_t attribute : inputs) - { - if (has_attribute(attribute, desc)) - continue; - D3D12_INPUT_ELEMENT_DESC extra_ia_desc = {}; - extra_ia_desc.SemanticIndex = (UINT)attribute; - extra_ia_desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; - extra_ia_desc.SemanticName = "TEXCOORD"; - extra_ia_desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - result.push_back(extra_ia_desc); - } - return result; -} - struct D3D12Traits { using vertex_program_type = Shader; using fragment_program_type = Shader; - using pipeline_storage_type = std::tuple, std::vector, size_t>; + using pipeline_storage_type = std::tuple, size_t, size_t>; using pipeline_properties = D3D12PipelineProperties; static @@ -176,15 +140,15 @@ struct D3D12Traits D3D12VertexProgramDecompiler VS(RSXVP); std::string shaderCode = VS.Decompile(); vertexProgramData.Compile(shaderCode, Shader::SHADER_TYPE::SHADER_TYPE_VERTEX); - vertexProgramData.vertex_shader_inputs = VS.input_slots; + vertexProgramData.vertex_shader_input_count = RSXVP.rsx_vertex_inputs.size(); fs::file(fs::get_config_dir() + "VertexProgram" + std::to_string(ID) + ".hlsl", fom::rewrite).write(shaderCode); vertexProgramData.id = (u32)ID; } static - pipeline_storage_type build_pipeline( - const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties &pipelineProperties, - ID3D12Device *device, gsl::span, 17> root_signatures) + pipeline_storage_type build_pipeline( + const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties &pipelineProperties, + ID3D12Device *device, gsl::span, 17, 17> root_signatures) { std::tuple, size_t> result = {}; D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; @@ -199,7 +163,7 @@ struct D3D12Traits graphicPipelineStateDesc.PS.BytecodeLength = fragmentProgramData.bytecode->GetBufferSize(); graphicPipelineStateDesc.PS.pShaderBytecode = fragmentProgramData.bytecode->GetBufferPointer(); - graphicPipelineStateDesc.pRootSignature = root_signatures[fragmentProgramData.m_textureCount].Get(); + graphicPipelineStateDesc.pRootSignature = root_signatures[fragmentProgramData.m_textureCount][vertexProgramData.vertex_shader_input_count].Get(); graphicPipelineStateDesc.BlendState = pipelineProperties.Blend; graphicPipelineStateDesc.DepthStencilState = pipelineProperties.DepthStencil; @@ -211,10 +175,6 @@ struct D3D12Traits graphicPipelineStateDesc.RTVFormats[i] = pipelineProperties.RenderTargetsFormat; graphicPipelineStateDesc.DSVFormat = pipelineProperties.DepthStencilFormat; - const std::vector &completed_IA_desc = completes_IA_desc(pipelineProperties.IASet, vertexProgramData.vertex_shader_inputs); - - graphicPipelineStateDesc.InputLayout.pInputElementDescs = completed_IA_desc.data(); - graphicPipelineStateDesc.InputLayout.NumElements = (UINT)completed_IA_desc.size(); graphicPipelineStateDesc.SampleDesc.Count = 1; graphicPipelineStateDesc.SampleMask = UINT_MAX; graphicPipelineStateDesc.NodeMask = 1; @@ -226,7 +186,7 @@ struct D3D12Traits std::wstring name = L"PSO_" + std::to_wstring(vertexProgramData.id) + L"_" + std::to_wstring(fragmentProgramData.id); pso->SetName(name.c_str()); - return std::make_tuple(pso, vertexProgramData.vertex_shader_inputs, fragmentProgramData.m_textureCount); + return std::make_tuple(pso, vertexProgramData.vertex_shader_input_count, fragmentProgramData.m_textureCount); } }; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 7d663411d6..a56b5f4212 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -52,20 +52,40 @@ void D3D12VertexProgramDecompiler::insertHeader(std::stringstream &OS) OS << "};" << std::endl; } +namespace +{ + bool declare_input(std::stringstream & OS, const std::tuple &attribute, const std::vector &inputs, size_t reg) + { + for (const auto &real_input : inputs) + { + if (static_cast(real_input.location) != std::get<0>(attribute)) + continue; + OS << "Texture1D " << std::get<1>(attribute) << "_buffer : register(t" << reg++ << ");\n"; + return true; + } + return false; + } +} + void D3D12VertexProgramDecompiler::insertInputs(std::stringstream & OS, const std::vector& inputs) { - OS << "struct VertexInput" << std::endl; - OS << "{" << std::endl; + std::vector> input_data; for (const ParamType PT : inputs) { for (const ParamItem &PI : PT.items) { - OS << " " << PT.type << " " << PI.name << ": TEXCOORD" << PI.location << ";" << std::endl; - input_slots.push_back(PI.location); + input_data.push_back(std::make_tuple(PI.location, PI.name)); } } - OS << "};" << std::endl; + std::sort(input_data.begin(), input_data.end()); + + size_t t_register = 0; + for (const auto &attribute : input_data) + { + if (declare_input(OS, attribute, rsx_vertex_program.rsx_vertex_inputs, t_register)) + t_register++; + } } void D3D12VertexProgramDecompiler::insertConstants(std::stringstream & OS, const std::vector & constants) @@ -140,9 +160,39 @@ static const reg_info reg_table[] = { "tc8", true, "dst_reg15", "", false }, }; +namespace +{ + void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector &inputs) + { + for (const auto &real_input : inputs) + { + if (real_input.location != PI.location) + continue; + if (!real_input.is_array) + { + OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(0);\n"; + return; + } + if (real_input.frequency > 1) + { + if (real_input.is_modulo) + { + OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id % " << real_input.frequency << ");\n"; + return; + } + OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id / " << real_input.frequency << ");\n"; + return; + } + OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id);\n"; + return; + } + OS << " float4 " << PI.name << " = float4(0., 0., 0., 1.);\n"; + } +} + void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) { - OS << "PixelInput main(VertexInput In)" << std::endl; + OS << "PixelInput main(uint vertex_id : SV_VertexID)" << std::endl; OS << "{" << std::endl; // Declare inside main function @@ -162,7 +212,9 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) for (const ParamType PT : m_parr.params[PF_PARAM_IN]) { for (const ParamItem &PI : PT.items) - OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; + { + add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs); + } } } @@ -182,7 +234,7 @@ void D3D12VertexProgramDecompiler::insertMainEnd(std::stringstream & OS) } D3D12VertexProgramDecompiler::D3D12VertexProgramDecompiler(const RSXVertexProgram &prog) : - VertexProgramDecompiler(prog) + VertexProgramDecompiler(prog), rsx_vertex_program(prog) { } #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h index fd5a55a7a8..01161b37c8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h @@ -18,7 +18,8 @@ protected: virtual void insertOutputs(std::stringstream &OS, const std::vector &outputs); virtual void insertMainStart(std::stringstream &OS); virtual void insertMainEnd(std::stringstream &OS); + + const RSXVertexProgram &rsx_vertex_program; public: - std::vector input_slots; D3D12VertexProgramDecompiler(const RSXVertexProgram &prog); };