d3d12: Store vertex attributes as SRV and disable Input_layout.

This commit is contained in:
Vincent Lejeune 2016-01-18 16:50:06 +01:00
parent ffb014ca3d
commit 6384541345
9 changed files with 297 additions and 211 deletions

View File

@ -8,19 +8,75 @@
#include "D3D12Formats.h"
#include "../rsx_methods.h"
std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::upload_vertex_attributes(const std::vector<std::pair<u32, u32> > &vertex_ranges)
namespace
{
std::vector<D3D12_VERTEX_BUFFER_VIEW> vertex_buffer_views;
UINT get_component_mapping_from_vector_size(u8 size)
{
switch (size)
{
case 1:
return D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1);
case 2:
return D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0,
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1);
case 3:
return D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0,
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1,
D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2,
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1);
case 4:
return D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
}
throw EXCEPTION("Wrong vector size %d", size);
}
m_ia_set.clear();
size_t input_slot = 0;
size_t vertex_count = 0;
for (const auto &pair : vertex_ranges)
u32 get_vertex_count(const std::vector<std::pair<u32, u32> > first_count_commands)
{
u32 vertex_count = 0;
for (const auto &pair : first_count_commands)
vertex_count += pair.second;
return vertex_count;
}
D3D12_SHADER_RESOURCE_VIEW_DESC get_vertex_attribute_srv(const rsx::data_array_format_info &info, UINT64 offset_in_vertex_buffers_buffer, UINT buffer_size)
{
u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size);
D3D12_SHADER_RESOURCE_VIEW_DESC vertex_buffer_view = {
get_vertex_attribute_format(info.type, info.size),
D3D12_SRV_DIMENSION_BUFFER,
get_component_mapping_from_vector_size(info.size)
};
vertex_buffer_view.Buffer.FirstElement = offset_in_vertex_buffers_buffer / element_size;
vertex_buffer_view.Buffer.NumElements = buffer_size / element_size;
return vertex_buffer_view;
}
template<int N>
UINT64 get_next_multiple_of(UINT64 val)
{
UINT64 divided_val = (val + N - 1) / N;
return divided_val * N;
}
}
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> D3D12GSRender::upload_vertex_attributes(
const std::vector<std::pair<u32, u32> > &vertex_ranges,
gsl::not_null<ID3D12GraphicsCommandList*> command_list)
{
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> vertex_buffer_views;
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data.Get(), D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST));
u32 vertex_count = get_vertex_count(vertex_ranges);
size_t offset_in_vertex_buffers_buffer = 0;
u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK];
for (int index = 0; index < rsx::limits::vertex_count; ++index)
@ -35,8 +91,7 @@ std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::upload_vertex_attributes(co
const rsx::data_array_format_info &info = vertex_arrays_info[index];
u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size);
size_t buffer_size = element_size * vertex_count;
UINT buffer_size = element_size * vertex_count;
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
@ -47,64 +102,104 @@ std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::upload_vertex_attributes(co
}
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view =
{
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)element_size
};
vertex_buffer_views.push_back(vertex_buffer_view);
command_list->CopyBufferRegion(m_vertex_buffer_data.Get(), offset_in_vertex_buffers_buffer, m_buffer_data.get_heap(), heap_offset, buffer_size);
vertex_buffer_views.emplace_back(get_vertex_attribute_srv(info, offset_in_vertex_buffers_buffer, buffer_size));
offset_in_vertex_buffers_buffer = get_next_multiple_of<48>(offset_in_vertex_buffers_buffer + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16
m_timers.buffer_upload_size += buffer_size;
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)input_slot++;
IAElement.Format = get_vertex_attribute_format(info.type, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
IAElement.InstanceDataStepRate = 0;
m_ia_set.push_back(IAElement);
}
else if (register_vertex_info[index].size > 0)
{
// In register vertex attribute
const rsx::data_array_format_info &info = register_vertex_info[index];
const std::vector<u8> &data = register_vertex_data[index];
u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size);
size_t buffer_size = data.size();
UINT buffer_size = gsl::narrow<UINT>(data.size());
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
memcpy(mapped_buffer, data.data(), data.size());
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = {
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)element_size
};
vertex_buffer_views.push_back(vertex_buffer_view);
command_list->CopyBufferRegion(m_vertex_buffer_data.Get(), offset_in_vertex_buffers_buffer, m_buffer_data.get_heap(), heap_offset, buffer_size);
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = (UINT)input_slot++;
IAElement.Format = get_vertex_attribute_format(info.type, info.size);
IAElement.AlignedByteOffset = 0;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
IAElement.InstanceDataStepRate = 1;
m_ia_set.push_back(IAElement);
vertex_buffer_views.emplace_back(get_vertex_attribute_srv(info, offset_in_vertex_buffers_buffer, buffer_size));
offset_in_vertex_buffers_buffer = get_next_multiple_of<48>(offset_in_vertex_buffers_buffer + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16
}
}
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER));
return vertex_buffer_views;
}
namespace
{
std::tuple<std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>, size_t> upload_inlined_vertex_array(
gsl::span<const rsx::data_array_format_info, 16> vertex_attribute_infos,
gsl::span<const gsl::byte> inlined_array_raw_data,
data_heap& ring_buffer_data,
ID3D12Resource* vertex_buffer_placement,
ID3D12GraphicsCommandList* command_list
)
{
// We can't rely on vertex_attribute_infos strides here so compute it
// assuming all attributes are packed
u32 stride = 0;
for (const auto &info : vertex_attribute_infos)
{
if (!info.size) // disabled
continue;
stride += rsx::get_vertex_type_size_on_host(info.type, info.size);
}
u32 element_count = gsl::narrow<u32>(inlined_array_raw_data.size_bytes()) / stride;
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> result;
UINT64 vertex_buffer_offset = 0;
for (const auto &info : vertex_attribute_infos)
{
if (!info.size) // disabled
continue;
u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size);
UINT buffer_size = element_size * element_count;
size_t heap_offset = ring_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *mapped_buffer = ring_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
gsl::span<gsl::byte> dst = { (gsl::byte*)mapped_buffer, buffer_size };
for (u32 i = 0; i < element_count; i++)
{
auto subdst = dst.subspan(i * element_size, element_size);
auto subsrc = inlined_array_raw_data.subspan(i * stride, element_size);
if (info.type == rsx::vertex_base_type::ub && info.size == 4)
{
subdst[0] = subsrc[3];
subdst[1] = subsrc[2];
subdst[2] = subsrc[1];
subdst[3] = subsrc[0];
}
else
{
std::copy(subsrc.begin(), subsrc.end(), subdst.begin());
}
}
ring_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
command_list->CopyBufferRegion(vertex_buffer_placement, vertex_buffer_offset, ring_buffer_data.get_heap(), heap_offset, buffer_size);
result.emplace_back(get_vertex_attribute_srv(info, vertex_buffer_offset, buffer_size));
vertex_buffer_offset = get_next_multiple_of<48>(vertex_buffer_offset + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16
}
return std::make_tuple(result, element_count);
}
}
void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
{
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(256);
@ -187,47 +282,7 @@ void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_
}
std::tuple<D3D12_VERTEX_BUFFER_VIEW, size_t> D3D12GSRender::upload_inlined_vertex_array()
{
UINT offset = 0;
m_ia_set.clear();
// Bind attributes
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
const auto &info = vertex_arrays_info[index];
if (!info.size) // disabled
continue;
D3D12_INPUT_ELEMENT_DESC IAElement = {};
IAElement.SemanticName = "TEXCOORD";
IAElement.SemanticIndex = (UINT)index;
IAElement.InputSlot = 0;
IAElement.Format = get_vertex_attribute_format(info.type, info.size);
IAElement.AlignedByteOffset = offset;
IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
IAElement.InstanceDataStepRate = 0;
m_ia_set.push_back(IAElement);
offset += rsx::get_vertex_type_size_on_host(info.type, info.size);
}
// Copy inline buffer
size_t buffer_size = inline_vertex_array.size() * sizeof(int);
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
write_inline_array_to_buffer(mapped_buffer);
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view =
{
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
(UINT)offset
};
return std::make_tuple(vertex_buffer_view, (u32)buffer_size / offset);
}
std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> D3D12GSRender::generate_index_buffer_for_emulated_primitives_array(const std::vector<std::pair<u32, u32> > &vertex_ranges)
{
@ -258,53 +313,46 @@ std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> D3D12GSRender::generate_index_buffer
return std::make_tuple(index_buffer_view, index_count);
}
std::tuple<bool, size_t> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list)
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list)
{
if (draw_command == rsx::draw_command::inlined_array)
{
size_t vertex_count;
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view;
std::tie(vertex_buffer_view, vertex_count) = upload_inlined_vertex_array();
command_list->IASetVertexBuffers(0, (UINT)1, &vertex_buffer_view);
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> vertex_buffer_view;
std::tie(vertex_buffer_view, vertex_count) = upload_inlined_vertex_array(
vertex_arrays_info,
{ (const gsl::byte*) inline_vertex_array.data(), gsl::narrow<int>(inline_vertex_array.size() * sizeof(uint)) },
m_buffer_data, m_vertex_buffer_data.Get(), command_list);
if (is_primitive_native(draw_mode))
return std::make_tuple(false, vertex_count);
return std::make_tuple(false, vertex_count, vertex_buffer_view);
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
size_t index_count;
std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array({ { 0, (u32)vertex_count } });
command_list->IASetIndexBuffer(&index_buffer_view);
return std::make_tuple(true, index_count);
return std::make_tuple(true, index_count, vertex_buffer_view);
}
if (draw_command == rsx::draw_command::array)
{
const std::vector<D3D12_VERTEX_BUFFER_VIEW> &vertex_buffer_views = upload_vertex_attributes(first_count_commands);
command_list->IASetVertexBuffers(0, (UINT)vertex_buffer_views.size(), vertex_buffer_views.data());
if (is_primitive_native(draw_mode))
{
// Index count
size_t vertex_count = 0;
for (const auto &pair : first_count_commands)
vertex_count += pair.second;
return std::make_tuple(false, vertex_count);
size_t vertex_count = get_vertex_count(first_count_commands);
return std::make_tuple(false, vertex_count, upload_vertex_attributes(first_count_commands, command_list));
}
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
size_t index_count;
std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array(first_count_commands);
command_list->IASetIndexBuffer(&index_buffer_view);
return std::make_tuple(true, index_count);
return std::make_tuple(true, index_count, upload_vertex_attributes(first_count_commands, command_list));
}
assert(draw_command == rsx::draw_command::indexed);
// Index count
size_t index_count = 0;
for (const auto &pair : first_count_commands)
index_count += pair.second;
index_count = get_index_count(draw_mode, gsl::narrow<int>(index_count));
size_t index_count = get_index_count(draw_mode, gsl::narrow<int>(get_vertex_count(first_count_commands)));
rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4);
size_t index_size = get_index_type_size(indexed_type);
@ -337,10 +385,7 @@ std::tuple<bool, size_t> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12G
m_timers.buffer_upload_size += buffer_size;
command_list->IASetIndexBuffer(&index_buffer_view);
const std::vector<D3D12_VERTEX_BUFFER_VIEW> &vertex_buffer_views = upload_vertex_attributes({ std::make_pair(0, max_index + 1) });
command_list->IASetVertexBuffers(0, (UINT)vertex_buffer_views.size(), vertex_buffer_views.data());
return std::make_tuple(true, index_count);
return std::make_tuple(true, index_count, upload_vertex_attributes({ std::make_pair(0, max_index + 1) }, command_list));
}
#endif

View File

@ -123,7 +123,7 @@ void D3D12FragmentDecompiler::insertConstants(std::stringstream & OS)
for (ParamItem PI : PT.items)
{
size_t textureIndex = atoi(PI.name.data() + 3);
OS << "Texture2D " << PI.name << " : register(t" << textureIndex << ");" << std::endl;
OS << "Texture2D " << PI.name << " : register(t" << textureIndex + 16 << ");" << std::endl;
OS << "sampler " << PI.name << "sampler : register(s" << textureIndex << ");" << std::endl;
}
}
@ -141,7 +141,7 @@ void D3D12FragmentDecompiler::insertConstants(std::stringstream & OS)
for (const ParamItem &PI : PT.items)
{
size_t textureIndex = atoi(PI.name.data() + 3);
OS << "TextureCube " << PI.name << " : register(t" << textureIndex << ");" << std::endl;
OS << "TextureCube " << PI.name << " : register(t" << textureIndex + 16 << ");" << std::endl;
OS << "sampler " << PI.name << "sampler : register(s" << textureIndex << ");" << std::endl;
}
}

View File

@ -135,33 +135,43 @@ D3D12GSRender::D3D12GSRender()
m_device->CreateRenderTargetView(m_backbuffer[1].Get(), &renter_target_view_desc, m_backbuffer_descriptor_heap[1]->GetCPUDescriptorHandleForHeapStart());
// Common root signatures
for (int vertex_buffer_count = 0; vertex_buffer_count < 17; vertex_buffer_count++) // Some app (naruto ultimate ninja storm 2) uses a shader without inputs...
{
for (unsigned texture_count = 0; texture_count < 17; texture_count++)
{
CD3DX12_DESCRIPTOR_RANGE descriptorRange[] =
{
// Vertex buffer
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, vertex_buffer_count, 0),
// Scale Offset data
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0),
// Constants
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 2, 1),
// Textures
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_count, 0),
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_count, 16),
// Samplers
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, texture_count, 0),
};
CD3DX12_ROOT_PARAMETER RP[2];
RP[0].InitAsDescriptorTable((texture_count > 0) ? 3 : 2, &descriptorRange[0]);
RP[1].InitAsDescriptorTable(1, &descriptorRange[3]);
UINT cbv_srv_uav_descriptor_size = 4;
if (texture_count == 0)
cbv_srv_uav_descriptor_size -= 1;
if (vertex_buffer_count == 0)
cbv_srv_uav_descriptor_size -= 1;
RP[0].InitAsDescriptorTable(cbv_srv_uav_descriptor_size, (vertex_buffer_count > 0) ? &descriptorRange[0] : &descriptorRange[1]);
RP[1].InitAsDescriptorTable(1, &descriptorRange[4]);
Microsoft::WRL::ComPtr<ID3DBlob> rootSignatureBlob;
Microsoft::WRL::ComPtr<ID3DBlob> errorBlob;
CHECK_HRESULT(wrapD3D12SerializeRootSignature(
&CD3DX12_ROOT_SIGNATURE_DESC((texture_count > 0) ? 2 : 1, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
&CD3DX12_ROOT_SIGNATURE_DESC((texture_count > 0) ? 2 : 1, RP, 0, 0),
D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob));
m_device->CreateRootSignature(0,
rootSignatureBlob->GetBufferPointer(),
rootSignatureBlob->GetBufferSize(),
IID_PPV_ARGS(m_root_signatures[texture_count].GetAddressOf()));
IID_PPV_ARGS(m_root_signatures[texture_count][vertex_buffer_count].GetAddressOf()));
}
}
m_per_frame_storage[0].init(m_device.Get());
@ -186,6 +196,17 @@ D3D12GSRender::D3D12GSRender()
m_readback_resources.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_STATE_COPY_DEST);
m_buffer_data.init(m_device.Get(), 1024 * 1024 * 896, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ);
CHECK_HRESULT(
m_device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(1024 * 1024 * 16),
D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER,
nullptr,
IID_PPV_ARGS(m_vertex_buffer_data.GetAddressOf())
)
);
if (rpcs3::config.rsx.d3d12.overlay.value())
init_d2d_structures();
}
@ -241,9 +262,14 @@ void D3D12GSRender::end()
std::chrono::time_point<std::chrono::system_clock> vertex_index_duration_start = std::chrono::system_clock::now();
size_t currentDescriptorIndex = get_current_resource_storage().descriptors_heap_index;
size_t vertex_count;
bool indexed_draw;
std::tie(indexed_draw, vertex_count) = upload_and_set_vertex_index_data(get_current_resource_storage().command_list.Get());
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> vertex_buffer_views;
std::tie(indexed_draw, vertex_count, vertex_buffer_views) = upload_and_set_vertex_index_data(get_current_resource_storage().command_list.Get());
size_t vertex_buffer_count = vertex_buffer_views.size();
std::chrono::time_point<std::chrono::system_clock> vertex_index_duration_end = std::chrono::system_clock::now();
m_timers.vertex_index_duration += std::chrono::duration_cast<std::chrono::microseconds>(vertex_index_duration_end - vertex_index_duration_start).count();
@ -253,16 +279,23 @@ void D3D12GSRender::end()
std::chrono::time_point<std::chrono::system_clock> program_load_end = std::chrono::system_clock::now();
m_timers.program_load_duration += std::chrono::duration_cast<std::chrono::microseconds>(program_load_end - program_load_start).count();
get_current_resource_storage().command_list->SetGraphicsRootSignature(m_root_signatures[std::get<2>(m_current_pso)].Get());
get_current_resource_storage().command_list->SetGraphicsRootSignature(m_root_signatures[std::get<2>(m_current_pso)][vertex_buffer_count].Get());
get_current_resource_storage().command_list->OMSetStencilRef(rsx::method_registers[NV4097_SET_STENCIL_FUNC_REF]);
std::chrono::time_point<std::chrono::system_clock> constants_duration_start = std::chrono::system_clock::now();
size_t currentDescriptorIndex = get_current_resource_storage().descriptors_heap_index;
INT offset = 0;
for (const auto view : vertex_buffer_views)
{
m_device->CreateShaderResourceView(m_vertex_buffer_data.Get(), &view,
CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)currentDescriptorIndex + offset++, m_descriptor_stride_srv_cbv_uav));
}
// Constants
upload_and_bind_scale_offset_matrix(currentDescriptorIndex);
upload_and_bind_vertex_shader_constants(currentDescriptorIndex + 1);
upload_and_bind_fragment_shader_constants(currentDescriptorIndex + 2);
upload_and_bind_scale_offset_matrix(currentDescriptorIndex + vertex_buffer_count);
upload_and_bind_vertex_shader_constants(currentDescriptorIndex + 1 + vertex_buffer_count);
upload_and_bind_fragment_shader_constants(currentDescriptorIndex + 2 + vertex_buffer_count);
std::chrono::time_point<std::chrono::system_clock> constants_duration_end = std::chrono::system_clock::now();
m_timers.constants_duration += std::chrono::duration_cast<std::chrono::microseconds>(constants_duration_end - constants_duration_start).count();
@ -272,8 +305,7 @@ void D3D12GSRender::end()
std::chrono::time_point<std::chrono::system_clock> texture_duration_start = std::chrono::system_clock::now();
if (std::get<2>(m_current_pso) > 0)
{
upload_and_bind_textures(get_current_resource_storage().command_list.Get(), currentDescriptorIndex + 3, std::get<2>(m_current_pso) > 0);
upload_and_bind_textures(get_current_resource_storage().command_list.Get(), currentDescriptorIndex + 3 + vertex_buffer_count, std::get<2>(m_current_pso) > 0);
get_current_resource_storage().command_list->SetGraphicsRootDescriptorTable(0,
CD3DX12_GPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetGPUDescriptorHandleForHeapStart())
@ -285,16 +317,15 @@ void D3D12GSRender::end()
);
get_current_resource_storage().current_sampler_index += std::get<2>(m_current_pso);
get_current_resource_storage().descriptors_heap_index += std::get<2>(m_current_pso) + 3;
get_current_resource_storage().descriptors_heap_index += std::get<2>(m_current_pso) + 3 + vertex_buffer_count;
}
else
{
get_current_resource_storage().command_list->SetDescriptorHeaps(1, get_current_resource_storage().descriptors_heap.GetAddressOf());
get_current_resource_storage().command_list->SetGraphicsRootDescriptorTable(0,
CD3DX12_GPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetGPUDescriptorHandleForHeapStart())
.Offset((INT)currentDescriptorIndex, m_descriptor_stride_srv_cbv_uav)
);
get_current_resource_storage().descriptors_heap_index += 3;
get_current_resource_storage().descriptors_heap_index += 3 + vertex_buffer_count;
}
std::chrono::time_point<std::chrono::system_clock> texture_duration_end = std::chrono::system_clock::now();

View File

@ -56,7 +56,7 @@ private:
ComPtr<ID3D12Resource> m_backbuffer[2];
ComPtr<ID3D12DescriptorHeap> m_backbuffer_descriptor_heap[2];
// m_rootSignatures[N] is RS with N texture/sample
ComPtr<ID3D12RootSignature> m_root_signatures[17];
ComPtr<ID3D12RootSignature> m_root_signatures[17][17]; // indexed by [texture count][vertex count]
// TODO: Use a tree structure to parse more efficiently
data_cache m_texture_cache;
@ -67,7 +67,7 @@ private:
RSXVertexProgram m_vertex_program;
RSXFragmentProgram m_fragment_program;
PipelineStateObjectCache m_pso_cache;
std::tuple<ComPtr<ID3D12PipelineState>, std::vector<size_t>, size_t> m_current_pso;
std::tuple<ComPtr<ID3D12PipelineState>, size_t, size_t> m_current_pso;
struct
{
@ -115,11 +115,10 @@ private:
// Textures, constants, index and vertex buffers storage
data_heap m_buffer_data;
data_heap m_readback_resources;
ComPtr<ID3D12Resource> m_vertex_buffer_data;
rsx::render_targets m_rtts;
std::vector<D3D12_INPUT_ELEMENT_DESC> m_ia_set;
INT m_descriptor_stride_srv_cbv_uav;
INT m_descriptor_stride_dsv;
INT m_descriptor_stride_rtv;
@ -145,16 +144,15 @@ private:
* Non native primitive type are emulated by index buffers expansion.
* Returns whether the draw call is indexed or not and the vertex count to draw.
*/
std::tuple<bool, size_t> upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list);
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> > upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list);
/**
* Upload all enabled vertex attributes for vertex in ranges described by vertex_ranges.
* A range in vertex_range is a pair whose first element is the index of the beginning of the
* range, and whose second element is the number of vertex in this range.
*/
std::vector<D3D12_VERTEX_BUFFER_VIEW> upload_vertex_attributes(const std::vector<std::pair<u32, u32> > &vertex_ranges);
std::tuple<D3D12_VERTEX_BUFFER_VIEW, size_t> upload_inlined_vertex_array();
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> upload_vertex_attributes(const std::vector<std::pair<u32, u32> > &vertex_ranges,
gsl::not_null<ID3D12GraphicsCommandList*> command_list);
std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> generate_index_buffer_for_emulated_primitives_array(const std::vector<std::pair<u32, u32> > &vertex_ranges);

View File

@ -106,7 +106,7 @@ void resource_storage::init(ID3D12Device *device)
CHECK_HRESULT(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, command_allocator.Get(), nullptr, IID_PPV_ARGS(command_list.GetAddressOf())));
CHECK_HRESULT(command_list->Close());
D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 10000, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE };
D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 50000, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE };
CHECK_HRESULT(device->CreateDescriptorHeap(&descriptor_heap_desc, IID_PPV_ARGS(&descriptors_heap)));
D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_desc = { D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER , 2048, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE };

View File

@ -241,7 +241,6 @@ void D3D12GSRender::load_program()
for (unsigned i = 0; i < prop.numMRT; i++)
prop.Blend.RenderTarget[i].RenderTargetWriteMask = mask;
prop.IASet = m_ia_set;
if (!!rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE])
{
rsx::index_array_type index_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4);

View File

@ -10,7 +10,6 @@ struct D3D12PipelineProperties
D3D12_PRIMITIVE_TOPOLOGY_TYPE Topology;
DXGI_FORMAT DepthStencilFormat;
DXGI_FORMAT RenderTargetsFormat;
std::vector<D3D12_INPUT_ELEMENT_DESC> IASet;
D3D12_BLEND_DESC Blend;
unsigned numMRT : 3;
D3D12_DEPTH_STENCIL_DESC DepthStencil;
@ -19,23 +18,6 @@ struct D3D12PipelineProperties
bool operator==(const D3D12PipelineProperties &in) const
{
if (IASet.size() != in.IASet.size())
return false;
for (unsigned i = 0; i < IASet.size(); i++)
{
const D3D12_INPUT_ELEMENT_DESC &a = IASet[i], &b = in.IASet[i];
if (a.AlignedByteOffset != b.AlignedByteOffset)
return false;
if (a.Format != b.Format)
return false;
if (a.InputSlot != b.InputSlot)
return false;
if (a.InstanceDataStepRate != b.InstanceDataStepRate)
return false;
if (a.SemanticIndex != b.SemanticIndex)
return false;
}
if (memcmp(&DepthStencil, &in.DepthStencil, sizeof(D3D12_DEPTH_STENCIL_DESC)))
return false;
if (memcmp(&Blend, &in.Blend, sizeof(D3D12_BLEND_DESC)))
@ -93,7 +75,7 @@ public:
ComPtr<ID3DBlob> bytecode;
// For debugging
std::string content;
std::vector<size_t> vertex_shader_inputs;
size_t vertex_shader_input_count;
std::vector<size_t> FragmentConstantOffsetCache;
size_t m_textureCount;
@ -118,29 +100,11 @@ bool has_attribute(size_t attribute, const std::vector<D3D12_INPUT_ELEMENT_DESC>
return false;
}
static
std::vector<D3D12_INPUT_ELEMENT_DESC> completes_IA_desc(const std::vector<D3D12_INPUT_ELEMENT_DESC> &desc, const std::vector<size_t> &inputs)
{
std::vector<D3D12_INPUT_ELEMENT_DESC> result(desc);
for (size_t attribute : inputs)
{
if (has_attribute(attribute, desc))
continue;
D3D12_INPUT_ELEMENT_DESC extra_ia_desc = {};
extra_ia_desc.SemanticIndex = (UINT)attribute;
extra_ia_desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
extra_ia_desc.SemanticName = "TEXCOORD";
extra_ia_desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
result.push_back(extra_ia_desc);
}
return result;
}
struct D3D12Traits
{
using vertex_program_type = Shader;
using fragment_program_type = Shader;
using pipeline_storage_type = std::tuple<ComPtr<ID3D12PipelineState>, std::vector<size_t>, size_t>;
using pipeline_storage_type = std::tuple<ComPtr<ID3D12PipelineState>, size_t, size_t>;
using pipeline_properties = D3D12PipelineProperties;
static
@ -176,7 +140,7 @@ struct D3D12Traits
D3D12VertexProgramDecompiler VS(RSXVP);
std::string shaderCode = VS.Decompile();
vertexProgramData.Compile(shaderCode, Shader::SHADER_TYPE::SHADER_TYPE_VERTEX);
vertexProgramData.vertex_shader_inputs = VS.input_slots;
vertexProgramData.vertex_shader_input_count = RSXVP.rsx_vertex_inputs.size();
fs::file(fs::get_config_dir() + "VertexProgram" + std::to_string(ID) + ".hlsl", fom::rewrite).write(shaderCode);
vertexProgramData.id = (u32)ID;
}
@ -184,7 +148,7 @@ struct D3D12Traits
static
pipeline_storage_type build_pipeline(
const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties &pipelineProperties,
ID3D12Device *device, gsl::span<ComPtr<ID3D12RootSignature>, 17> root_signatures)
ID3D12Device *device, gsl::span<ComPtr<ID3D12RootSignature>, 17, 17> root_signatures)
{
std::tuple<ID3D12PipelineState *, std::vector<size_t>, size_t> result = {};
D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {};
@ -199,7 +163,7 @@ struct D3D12Traits
graphicPipelineStateDesc.PS.BytecodeLength = fragmentProgramData.bytecode->GetBufferSize();
graphicPipelineStateDesc.PS.pShaderBytecode = fragmentProgramData.bytecode->GetBufferPointer();
graphicPipelineStateDesc.pRootSignature = root_signatures[fragmentProgramData.m_textureCount].Get();
graphicPipelineStateDesc.pRootSignature = root_signatures[fragmentProgramData.m_textureCount][vertexProgramData.vertex_shader_input_count].Get();
graphicPipelineStateDesc.BlendState = pipelineProperties.Blend;
graphicPipelineStateDesc.DepthStencilState = pipelineProperties.DepthStencil;
@ -211,10 +175,6 @@ struct D3D12Traits
graphicPipelineStateDesc.RTVFormats[i] = pipelineProperties.RenderTargetsFormat;
graphicPipelineStateDesc.DSVFormat = pipelineProperties.DepthStencilFormat;
const std::vector<D3D12_INPUT_ELEMENT_DESC> &completed_IA_desc = completes_IA_desc(pipelineProperties.IASet, vertexProgramData.vertex_shader_inputs);
graphicPipelineStateDesc.InputLayout.pInputElementDescs = completed_IA_desc.data();
graphicPipelineStateDesc.InputLayout.NumElements = (UINT)completed_IA_desc.size();
graphicPipelineStateDesc.SampleDesc.Count = 1;
graphicPipelineStateDesc.SampleMask = UINT_MAX;
graphicPipelineStateDesc.NodeMask = 1;
@ -226,7 +186,7 @@ struct D3D12Traits
std::wstring name = L"PSO_" + std::to_wstring(vertexProgramData.id) + L"_" + std::to_wstring(fragmentProgramData.id);
pso->SetName(name.c_str());
return std::make_tuple(pso, vertexProgramData.vertex_shader_inputs, fragmentProgramData.m_textureCount);
return std::make_tuple(pso, vertexProgramData.vertex_shader_input_count, fragmentProgramData.m_textureCount);
}
};

View File

@ -52,20 +52,40 @@ void D3D12VertexProgramDecompiler::insertHeader(std::stringstream &OS)
OS << "};" << std::endl;
}
namespace
{
bool declare_input(std::stringstream & OS, const std::tuple<size_t, std::string> &attribute, const std::vector<rsx_vertex_input> &inputs, size_t reg)
{
for (const auto &real_input : inputs)
{
if (static_cast<size_t>(real_input.location) != std::get<0>(attribute))
continue;
OS << "Texture1D<float4> " << std::get<1>(attribute) << "_buffer : register(t" << reg++ << ");\n";
return true;
}
return false;
}
}
void D3D12VertexProgramDecompiler::insertInputs(std::stringstream & OS, const std::vector<ParamType>& inputs)
{
OS << "struct VertexInput" << std::endl;
OS << "{" << std::endl;
std::vector<std::tuple<size_t, std::string>> input_data;
for (const ParamType PT : inputs)
{
for (const ParamItem &PI : PT.items)
{
OS << " " << PT.type << " " << PI.name << ": TEXCOORD" << PI.location << ";" << std::endl;
input_slots.push_back(PI.location);
input_data.push_back(std::make_tuple(PI.location, PI.name));
}
}
OS << "};" << std::endl;
std::sort(input_data.begin(), input_data.end());
size_t t_register = 0;
for (const auto &attribute : input_data)
{
if (declare_input(OS, attribute, rsx_vertex_program.rsx_vertex_inputs, t_register))
t_register++;
}
}
void D3D12VertexProgramDecompiler::insertConstants(std::stringstream & OS, const std::vector<ParamType> & constants)
@ -140,9 +160,39 @@ static const reg_info reg_table[] =
{ "tc8", true, "dst_reg15", "", false },
};
namespace
{
void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector<rsx_vertex_input> &inputs)
{
for (const auto &real_input : inputs)
{
if (real_input.location != PI.location)
continue;
if (!real_input.is_array)
{
OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(0);\n";
return;
}
if (real_input.frequency > 1)
{
if (real_input.is_modulo)
{
OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id % " << real_input.frequency << ");\n";
return;
}
OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id / " << real_input.frequency << ");\n";
return;
}
OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id);\n";
return;
}
OS << " float4 " << PI.name << " = float4(0., 0., 0., 1.);\n";
}
}
void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS)
{
OS << "PixelInput main(VertexInput In)" << std::endl;
OS << "PixelInput main(uint vertex_id : SV_VertexID)" << std::endl;
OS << "{" << std::endl;
// Declare inside main function
@ -162,7 +212,9 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS)
for (const ParamType PT : m_parr.params[PF_PARAM_IN])
{
for (const ParamItem &PI : PT.items)
OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl;
{
add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs);
}
}
}
@ -182,7 +234,7 @@ void D3D12VertexProgramDecompiler::insertMainEnd(std::stringstream & OS)
}
D3D12VertexProgramDecompiler::D3D12VertexProgramDecompiler(const RSXVertexProgram &prog) :
VertexProgramDecompiler(prog)
VertexProgramDecompiler(prog), rsx_vertex_program(prog)
{
}
#endif

View File

@ -18,7 +18,8 @@ protected:
virtual void insertOutputs(std::stringstream &OS, const std::vector<ParamType> &outputs);
virtual void insertMainStart(std::stringstream &OS);
virtual void insertMainEnd(std::stringstream &OS);
const RSXVertexProgram &rsx_vertex_program;
public:
std::vector<size_t> input_slots;
D3D12VertexProgramDecompiler(const RSXVertexProgram &prog);
};