From ce21db2ce3bfd71f55762b846685868456cd195c Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 27 Aug 2016 19:12:24 +0200 Subject: [PATCH 1/4] Tweak clang-format --- .clang-format | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.clang-format b/.clang-format index 6536f034e3..266b7859e8 100644 --- a/.clang-format +++ b/.clang-format @@ -1,11 +1,11 @@ Standard: Cpp11 UseTab: ForIndentation TabWidth: 1 -IndentWidth: 1 +IndentWidth: 1 AccessModifierOffset: -1 PointerAlignment: Left NamespaceIndentation: All -ColumnLimit: 0 +ColumnLimit: 100 BreakBeforeBraces: Allman BreakConstructorInitializersBeforeComma: true BreakBeforeBinaryOperators: false @@ -22,7 +22,7 @@ SortIncludes: false ReflowComments: true AlignConsecutiveAssignments: true AlignTrailingComments: true -AlignAfterOpenBracket: false +AlignAfterOpenBracket: DontAlign ConstructorInitializerAllOnOneLineOrOnePerLine: false BinPackArguments: true BinPackParameters: true From 03c86ae43b8461dfe2278c53a6bbc10e7fcff892 Mon Sep 17 00:00:00 2001 From: vlj Date: Sat, 27 Aug 2016 19:22:12 +0200 Subject: [PATCH 2/4] rsx: Move inline array to draw_clause structure. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 10 ++++++---- rpcs3/Emu/RSX/GL/vertex_buffer.cpp | 7 +++++-- rpcs3/Emu/RSX/RSXThread.cpp | 8 +++++--- rpcs3/Emu/RSX/RSXThread.h | 2 -- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 7 +++++-- rpcs3/Emu/RSX/rsx_methods.cpp | 5 +++-- rpcs3/Emu/RSX/rsx_methods.h | 2 ++ 7 files changed, 26 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index e08f7c33c1..c07baf056f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -347,10 +347,12 @@ std::tuple> D3D12GSRe { size_t vertex_count; std::vector vertex_buffer_view; - std::tie(vertex_buffer_view, vertex_count) = upload_inlined_vertex_array( - rsx::method_registers.vertex_arrays_info, - { (const gsl::byte*) inline_vertex_array.data(), ::narrow(inline_vertex_array.size() * sizeof(uint)) }, - m_buffer_data, m_vertex_buffer_data.Get(), command_list); + std::tie(vertex_buffer_view, vertex_count) = + upload_inlined_vertex_array(rsx::method_registers.vertex_arrays_info, + {(const gsl::byte*)rsx::method_registers.current_draw_clause.inline_vertex_array.data(), + ::narrow(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * + sizeof(uint))}, + m_buffer_data, m_vertex_buffer_data.Get(), command_list); if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) return std::make_tuple(false, vertex_count, vertex_buffer_view); diff --git a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp index 624d93d5b7..14846eab86 100644 --- a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp +++ b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp @@ -418,7 +418,9 @@ u32 GLGSRender::upload_inline_array(const u32 &max_vertex_attrib_size, const u32 stride += rsx::get_vertex_type_size_on_host(info.type(), info.size()); } - u32 vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride; + u32 vertex_draw_count = + (u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(u32)) / + stride; m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size); for (int index = 0; index < rsx::limits::vertex_count; ++index) @@ -443,7 +445,8 @@ u32 GLGSRender::upload_inline_array(const u32 &max_vertex_attrib_size, const u32 auto &texture = m_gl_attrib_buffers[index]; - u8 *src = reinterpret_cast(inline_vertex_array.data()); + u8* src = + reinterpret_cast(rsx::method_registers.current_draw_clause.inline_vertex_array.data()); auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment); u8 *dst = static_cast(mapping.first); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index cb35c5f7a8..00d7c7c075 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -327,7 +327,7 @@ namespace rsx void thread::begin() { - inline_vertex_array.clear(); + rsx::method_registers.current_draw_clause.inline_vertex_array.clear(); } void thread::end() @@ -521,11 +521,13 @@ namespace rsx void thread::write_inline_array_to_buffer(void *dst_buffer) { - u8* src = reinterpret_cast(inline_vertex_array.data()); + u8* src = + reinterpret_cast(rsx::method_registers.current_draw_clause.inline_vertex_array.data()); u8* dst = (u8*)dst_buffer; size_t bytes_written = 0; - while (bytes_written < inline_vertex_array.size() * sizeof(u32)) + while (bytes_written < + rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(u32)) { for (int index = 0; index < rsx::limits::vertex_count; ++index) { diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 5f17179164..2e096924e3 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -183,8 +183,6 @@ namespace rsx u32 local_mem_addr, main_mem_addr; bool strict_ordering[0x1000]; - std::vector inline_vertex_array; - bool m_rtts_dirty; bool m_transform_constants_dirty; bool m_textures_dirty[16]; diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 412176974e..8805fcb3fd 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -394,7 +394,9 @@ u32 VKGSRender::upload_inlined_array() stride += rsx::get_vertex_type_size_on_host(info.type(), info.size()); } - u32 vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride; + u32 vertex_draw_count = + (u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(u32)) / + stride; for (int index = 0; index < rsx::limits::vertex_count; ++index) { @@ -413,7 +415,8 @@ u32 VKGSRender::upload_inlined_array() const VkFormat format = vk::get_suitable_vk_format(vertex_info.type(), vertex_info.size()); u32 offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size); - u8 *src = reinterpret_cast(inline_vertex_array.data()); + u8* src = + reinterpret_cast(rsx::method_registers.current_draw_clause.inline_vertex_array.data()); u8 *dst = static_cast(m_attrib_ring_info.map(offset_in_attrib_buffer, data_size)); src += offsets[index]; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 2675e26d29..ff4946116a 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -206,7 +206,7 @@ namespace rsx void draw_inline_array(thread* rsx, u32 _reg, u32 arg) { rsx::method_registers.current_draw_clause.command = rsx::draw_command::inlined_array; - rsx->inline_vertex_array.push_back(arg); + rsx::method_registers.current_draw_clause.inline_vertex_array.push_back(arg); } template @@ -269,7 +269,8 @@ namespace rsx rsx::method_registers.current_draw_clause.first_count_commands.push_back(std::make_pair(0, max_vertex_count)); } - if (!(rsx::method_registers.current_draw_clause.first_count_commands.empty() && rsxthr->inline_vertex_array.empty())) + if (!(rsx::method_registers.current_draw_clause.first_count_commands.empty() && + rsx::method_registers.current_draw_clause.inline_vertex_array.empty())) { rsxthr->end(); } diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index 86b547dd28..a8a0d694ad 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -28,6 +28,8 @@ namespace rsx primitive_type primitive; draw_command command; + std::vector inline_vertex_array; + /** * Stores the first and count argument from draw/draw indexed parameters between begin/end clauses. */ From 8d54bcbc0dc1d13892308f49ae82e860cf6ee25f Mon Sep 17 00:00:00 2001 From: vlj Date: Sun, 28 Aug 2016 17:00:02 +0200 Subject: [PATCH 3/4] rsx: Use variant based draw commands. --- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 555 +++++++++++++++------------ rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 10 - rpcs3/Emu/RSX/GL/GLGSRender.h | 5 - rpcs3/Emu/RSX/GL/vertex_buffer.cpp | 409 +++++++++++--------- rpcs3/Emu/RSX/RSXThread.cpp | 23 ++ rpcs3/Emu/RSX/RSXThread.h | 25 ++ rpcs3/Emu/RSX/VK/VKGSRender.h | 18 - rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 408 +++++++++++--------- 8 files changed, 819 insertions(+), 634 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index c07baf056f..09043b5b62 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -80,166 +80,6 @@ namespace } } -namespace -{ - - struct vertex_buffer_visitor - { - std::vector vertex_buffer_views; - - vertex_buffer_visitor(u32 vtx_cnt, ID3D12GraphicsCommandList* cmdlst, ID3D12Resource* write_vertex_buffer, - d3d12_data_heap& heap) - : vertex_count(vtx_cnt) - , offset_in_vertex_buffers_buffer(0) - , m_buffer_data(heap) - , command_list(cmdlst) - , m_vertex_buffer_data(write_vertex_buffer) - { - } - - void operator()(const rsx::vertex_array_buffer& vertex_array) - { - u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); - UINT buffer_size = element_size * vertex_count; - size_t heap_offset = m_buffer_data.alloc(buffer_size); - - void* mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - gsl::span mapped_buffer_span = {(gsl::byte*)mapped_buffer, gsl::narrow_cast(buffer_size)}; - write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size); - - m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - - command_list->CopyBufferRegion(m_vertex_buffer_data, offset_in_vertex_buffers_buffer, m_buffer_data.get_heap(), heap_offset, buffer_size); - - vertex_buffer_views.emplace_back(get_vertex_attribute_srv(vertex_array.type, vertex_array.attribute_size, offset_in_vertex_buffers_buffer, buffer_size)); - offset_in_vertex_buffers_buffer = get_next_multiple_of<48>(offset_in_vertex_buffers_buffer + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16 - - //m_timers.buffer_upload_size += buffer_size; - } - - void operator()(const rsx::vertex_array_register& vertex_register) - { - u32 element_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size); - UINT buffer_size = element_size; - size_t heap_offset = m_buffer_data.alloc(buffer_size); - - void* mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - memcpy(mapped_buffer, vertex_register.data.data(), buffer_size); - m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - - command_list->CopyBufferRegion(m_vertex_buffer_data, offset_in_vertex_buffers_buffer, m_buffer_data.get_heap(), heap_offset, buffer_size); - - vertex_buffer_views.emplace_back(get_vertex_attribute_srv(vertex_register.type, vertex_register.attribute_size, offset_in_vertex_buffers_buffer, buffer_size)); - offset_in_vertex_buffers_buffer = get_next_multiple_of<48>(offset_in_vertex_buffers_buffer + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16 - } - - void operator()(const rsx::empty_vertex_array& vbo) - { - } - - protected: - u32 vertex_count; - ID3D12GraphicsCommandList* command_list; - ID3D12Resource* m_vertex_buffer_data; - size_t offset_in_vertex_buffers_buffer; - d3d12_data_heap& m_buffer_data; - }; - -} // End anonymous namespace - -std::vector D3D12GSRender::upload_vertex_attributes( - const std::vector>& vertex_ranges, - gsl::not_null command_list) -{ - command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data.Get(), D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST)); - - u32 vertex_count = get_vertex_count(vertex_ranges); - //verify(HERE), rsx::method_registers.vertex_data_base_index() == 0; - - vertex_buffer_visitor visitor(vertex_count, command_list, m_vertex_buffer_data.Get(), m_buffer_data); - const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, vertex_ranges); - - for (const auto& vbo : vertex_buffers) - std::apply_visitor(visitor, vbo); - - command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER)); - return visitor.vertex_buffer_views; -} - -namespace -{ -std::tuple, size_t> upload_inlined_vertex_array( - gsl::span vertex_attribute_infos, - gsl::span inlined_array_raw_data, - d3d12_data_heap& ring_buffer_data, - ID3D12Resource* vertex_buffer_placement, - ID3D12GraphicsCommandList* command_list - ) -{ - // We can't rely on vertex_attribute_infos strides here so compute it - // assuming all attributes are packed - u32 stride = 0; - u32 initial_offsets[rsx::limits::vertex_count]; - u8 index = 0; - for (const auto &info : vertex_attribute_infos) - { - initial_offsets[index++] = stride; - - if (!info.size()) // disabled - continue; - - stride += rsx::get_vertex_type_size_on_host(info.type(), info.size()); - } - - u32 element_count = ::narrow(inlined_array_raw_data.size_bytes()) / stride; - std::vector result; - - UINT64 vertex_buffer_offset = 0; - index = 0; - for (const auto &info : vertex_attribute_infos) - { - if (!info.size()) - { - index++; - continue; - } - - u32 element_size = rsx::get_vertex_type_size_on_host(info.type(), info.size()); - UINT buffer_size = element_size * element_count; - size_t heap_offset = ring_buffer_data.alloc(buffer_size); - - void *mapped_buffer = ring_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - gsl::span dst = { (gsl::byte*)mapped_buffer, buffer_size }; - - for (u32 i = 0; i < element_count; i++) - { - auto subdst = dst.subspan(i * element_size, element_size); - auto subsrc = inlined_array_raw_data.subspan(initial_offsets[index] + (i * stride), element_size); - if (info.type() == rsx::vertex_base_type::ub && info.size() == 4) - { - subdst[0] = subsrc[3]; - subdst[1] = subsrc[2]; - subdst[2] = subsrc[1]; - subdst[3] = subsrc[0]; - } - else - { - std::copy(subsrc.begin(), subsrc.end(), subdst.begin()); - } - } - - ring_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - - command_list->CopyBufferRegion(vertex_buffer_placement, vertex_buffer_offset, ring_buffer_data.get_heap(), heap_offset, buffer_size); - - result.emplace_back(get_vertex_attribute_srv(info, vertex_buffer_offset, buffer_size)); - vertex_buffer_offset = get_next_multiple_of<48>(vertex_buffer_offset + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16 - index++; - } - - return std::make_tuple(result, element_count); -} -} void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex) { @@ -309,106 +149,335 @@ D3D12_CONSTANT_BUFFER_VIEW_DESC D3D12GSRender::upload_fragment_shader_constants( }; } - - - -std::tuple D3D12GSRender::generate_index_buffer_for_emulated_primitives_array(const std::vector > &vertex_ranges) +namespace { - size_t index_count = 0; - for (const auto &pair : vertex_ranges) - index_count += get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.second); - // Alloc - size_t buffer_size = align(index_count * sizeof(u16), 64); - size_t heap_offset = m_buffer_data.alloc(buffer_size); - - void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - size_t first = 0; - for (const auto &pair : vertex_ranges) + struct vertex_buffer_visitor { - size_t element_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.second); - write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)mapped_buffer, rsx::method_registers.current_draw_clause.primitive, (u32)first, (u32)pair.second); - mapped_buffer = (char*)mapped_buffer + element_count * sizeof(u16); - first += pair.second; - } - m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_INDEX_BUFFER_VIEW index_buffer_view = { - m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, - (UINT)buffer_size, - DXGI_FORMAT_R16_UINT - }; + std::vector vertex_buffer_views; - return std::make_tuple(index_buffer_view, index_count); -} - -std::tuple> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list) -{ - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) - { - size_t vertex_count; - std::vector vertex_buffer_view; - std::tie(vertex_buffer_view, vertex_count) = - upload_inlined_vertex_array(rsx::method_registers.vertex_arrays_info, - {(const gsl::byte*)rsx::method_registers.current_draw_clause.inline_vertex_array.data(), - ::narrow(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * - sizeof(uint))}, - m_buffer_data, m_vertex_buffer_data.Get(), command_list); - - if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) - return std::make_tuple(false, vertex_count, vertex_buffer_view); - - D3D12_INDEX_BUFFER_VIEW index_buffer_view; - size_t index_count; - std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array({ { 0, (u32)vertex_count } }); - command_list->IASetIndexBuffer(&index_buffer_view); - return std::make_tuple(true, index_count, vertex_buffer_view); - } - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array) - { - if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) + vertex_buffer_visitor(u32 vtx_cnt, ID3D12GraphicsCommandList* cmdlst, + ID3D12Resource* write_vertex_buffer, d3d12_data_heap& heap) + : vertex_count(vtx_cnt), offset_in_vertex_buffers_buffer(0), m_buffer_data(heap), + command_list(cmdlst), m_vertex_buffer_data(write_vertex_buffer) { - size_t vertex_count = get_vertex_count(rsx::method_registers.current_draw_clause.first_count_commands); - return std::make_tuple(false, vertex_count, upload_vertex_attributes(rsx::method_registers.current_draw_clause.first_count_commands, command_list)); } - D3D12_INDEX_BUFFER_VIEW index_buffer_view; - size_t index_count; - std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array(rsx::method_registers.current_draw_clause.first_count_commands); - command_list->IASetIndexBuffer(&index_buffer_view); - return std::make_tuple(true, index_count, upload_vertex_attributes(rsx::method_registers.current_draw_clause.first_count_commands, command_list)); + void operator()(const rsx::vertex_array_buffer& vertex_array) + { + u32 element_size = + rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); + UINT buffer_size = element_size * vertex_count; + size_t heap_offset = + m_buffer_data.alloc(buffer_size); + + void* mapped_buffer = + m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + gsl::span mapped_buffer_span = { + (gsl::byte*)mapped_buffer, gsl::narrow_cast(buffer_size)}; + write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, vertex_count, + vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size); + + m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + + command_list->CopyBufferRegion(m_vertex_buffer_data, offset_in_vertex_buffers_buffer, + m_buffer_data.get_heap(), heap_offset, buffer_size); + + vertex_buffer_views.emplace_back(get_vertex_attribute_srv(vertex_array.type, + vertex_array.attribute_size, offset_in_vertex_buffers_buffer, buffer_size)); + offset_in_vertex_buffers_buffer = + get_next_multiple_of<48>(offset_in_vertex_buffers_buffer + + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16 + + // m_timers.buffer_upload_size += buffer_size; + } + + void operator()(const rsx::vertex_array_register& vertex_register) + { + u32 element_size = rsx::get_vertex_type_size_on_host( + vertex_register.type, vertex_register.attribute_size); + UINT buffer_size = element_size; + size_t heap_offset = + m_buffer_data.alloc(buffer_size); + + void* mapped_buffer = + m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + memcpy(mapped_buffer, vertex_register.data.data(), buffer_size); + m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + + command_list->CopyBufferRegion(m_vertex_buffer_data, offset_in_vertex_buffers_buffer, + m_buffer_data.get_heap(), heap_offset, buffer_size); + + vertex_buffer_views.emplace_back(get_vertex_attribute_srv(vertex_register.type, + vertex_register.attribute_size, offset_in_vertex_buffers_buffer, buffer_size)); + offset_in_vertex_buffers_buffer = + get_next_multiple_of<48>(offset_in_vertex_buffers_buffer + + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16 + } + + void operator()(const rsx::empty_vertex_array& vbo) + { + } + + protected: + u32 vertex_count; + ID3D12GraphicsCommandList* command_list; + ID3D12Resource* m_vertex_buffer_data; + size_t offset_in_vertex_buffers_buffer; + d3d12_data_heap& m_buffer_data; + }; + + std::tuple generate_index_buffer_for_emulated_primitives_array( + const std::vector>& vertex_ranges, d3d12_data_heap& m_buffer_data) + { + size_t index_count = std::accumulate( + vertex_ranges.begin(), vertex_ranges.end(), 0, [](size_t acc, const auto& pair) { + return acc + get_index_count( + rsx::method_registers.current_draw_clause.primitive, pair.second); + }); + + // Alloc + size_t buffer_size = align(index_count * sizeof(u16), 64); + size_t heap_offset = + m_buffer_data.alloc(buffer_size); + + void* mapped_buffer = + m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + size_t first = 0; + for (const auto& pair : vertex_ranges) { + size_t element_count = + get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.second); + write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)mapped_buffer, + rsx::method_registers.current_draw_clause.primitive, (u32)first, (u32)pair.second); + mapped_buffer = (char*)mapped_buffer + element_count * sizeof(u16); + first += pair.second; + } + m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + D3D12_INDEX_BUFFER_VIEW index_buffer_view = { + m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, (UINT)buffer_size, + DXGI_FORMAT_R16_UINT}; + + return std::make_tuple(index_buffer_view, index_count); } - verify(HERE), rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed; + using attribute_storage = std::vector>; - // Index count - size_t index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, ::narrow(get_vertex_count(rsx::method_registers.current_draw_clause.first_count_commands))); + /** + * Upload all enabled vertex attributes for vertex in ranges described by vertex_ranges. + * A range in vertex_range is a pair whose first element is the index of the beginning of the + * range, and whose second element is the number of vertex in this range. + */ + std::vector upload_vertex_attributes( + const std::vector>& vertex_ranges, + std::function>)> + get_vertex_buffers, + ID3D12Resource* m_vertex_buffer_data, d3d12_data_heap& m_buffer_data, + gsl::not_null command_list) + { + command_list->ResourceBarrier(1, + &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data, + D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST)); - rsx::index_array_type indexed_type = rsx::method_registers.index_type(); - size_t index_size = get_index_type_size(indexed_type); + u32 vertex_count = get_vertex_count(vertex_ranges); + verify(HERE), rsx::method_registers.vertex_data_base_index() == 0; - // Alloc - size_t buffer_size = align(index_count * index_size, 64); - size_t heap_offset = m_buffer_data.alloc(buffer_size); + vertex_buffer_visitor visitor( + vertex_count, command_list, m_vertex_buffer_data, m_buffer_data); + const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, vertex_ranges); - void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - u32 min_index, max_index; - gsl::span dst{ reinterpret_cast(mapped_buffer), ::narrow(buffer_size) }; + for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo); - std::tie(min_index, max_index) = write_index_array_data_to_buffer(dst, get_raw_index_array(rsx::method_registers.current_draw_clause.first_count_commands), - indexed_type, rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(), rsx::method_registers.current_draw_clause.first_count_commands, - [](auto prim) { return !is_primitive_native(prim); }); + command_list->ResourceBarrier(1, + &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data, + D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER)); + return visitor.vertex_buffer_views; + } - m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_INDEX_BUFFER_VIEW index_buffer_view = { - m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, - (UINT)buffer_size, - get_index_type(indexed_type) + std::tuple, size_t> upload_inlined_vertex_array( + gsl::span vertex_attribute_infos, + gsl::span inlined_array_raw_data, d3d12_data_heap& ring_buffer_data, + ID3D12Resource* vertex_buffer_placement, ID3D12GraphicsCommandList* command_list) + { + // We can't rely on vertex_attribute_infos strides here so compute it + // assuming all attributes are packed + u32 stride = 0; + u32 initial_offsets[rsx::limits::vertex_count]; + u8 index = 0; + for (const auto& info : vertex_attribute_infos) { + initial_offsets[index++] = stride; + + if (!info.size()) // disabled + continue; + + stride += rsx::get_vertex_type_size_on_host(info.type(), info.size()); + } + + u32 element_count = ::narrow(inlined_array_raw_data.size_bytes()) / stride; + std::vector result; + + UINT64 vertex_buffer_offset = 0; + index = 0; + for (const auto& info : vertex_attribute_infos) { + if (!info.size()) { + index++; + continue; + } + + u32 element_size = rsx::get_vertex_type_size_on_host(info.type(), info.size()); + UINT buffer_size = element_size * element_count; + size_t heap_offset = + ring_buffer_data.alloc(buffer_size); + + void* mapped_buffer = + ring_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + gsl::span dst = {(gsl::byte*)mapped_buffer, buffer_size}; + + for (u32 i = 0; i < element_count; i++) { + auto subdst = dst.subspan(i * element_size, element_size); + auto subsrc = inlined_array_raw_data.subspan( + initial_offsets[index] + (i * stride), element_size); + if (info.type() == rsx::vertex_base_type::ub && info.size() == 4) { + subdst[0] = subsrc[3]; + subdst[1] = subsrc[2]; + subdst[2] = subsrc[1]; + subdst[3] = subsrc[0]; + } + else + { + std::copy(subsrc.begin(), subsrc.end(), subdst.begin()); + } + } + + ring_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + + command_list->CopyBufferRegion(vertex_buffer_placement, vertex_buffer_offset, + ring_buffer_data.get_heap(), heap_offset, buffer_size); + + result.emplace_back(get_vertex_attribute_srv(info, vertex_buffer_offset, buffer_size)); + vertex_buffer_offset = get_next_multiple_of<48>( + vertex_buffer_offset + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16 + index++; + } + + return std::make_tuple(result, element_count); + } + + struct draw_command_visitor + { + draw_command_visitor(ID3D12GraphicsCommandList* cmd_list, d3d12_data_heap& buffer_data, + ID3D12Resource* vertex_buffer_data, + std::function>&)> + get_vertex_info_lambda) + : command_list(cmd_list), m_buffer_data(buffer_data), + m_vertex_buffer_data(vertex_buffer_data), get_vertex_buffers(get_vertex_info_lambda) + { + } + + std::tuple> operator()( + const rsx::draw_array_command& command) + { + if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { + size_t vertex_count = get_vertex_count(command.indexes_range); + return std::make_tuple(false, vertex_count, + upload_vertex_attributes(command.indexes_range, get_vertex_buffers, + m_vertex_buffer_data, m_buffer_data, command_list)); + } + + D3D12_INDEX_BUFFER_VIEW index_buffer_view; + size_t index_count; + std::tie(index_buffer_view, index_count) = + generate_index_buffer_for_emulated_primitives_array( + command.indexes_range, m_buffer_data); + command_list->IASetIndexBuffer(&index_buffer_view); + return std::make_tuple(true, index_count, + upload_vertex_attributes(command.indexes_range, get_vertex_buffers, + m_vertex_buffer_data, m_buffer_data, command_list)); + } + + std::tuple> operator()( + const rsx::draw_indexed_array_command& command) + { + // Index count + size_t index_count = + get_index_count(rsx::method_registers.current_draw_clause.primitive, + ::narrow(get_vertex_count(command.ranges_to_fetch_in_index_buffer))); + + rsx::index_array_type indexed_type = rsx::method_registers.index_type(); + size_t index_size = get_index_type_size(indexed_type); + + // Alloc + size_t buffer_size = align(index_count * index_size, 64); + size_t heap_offset = + m_buffer_data.alloc(buffer_size); + + void* mapped_buffer = + m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + u32 min_index, max_index; + gsl::span dst{ + reinterpret_cast(mapped_buffer), ::narrow(buffer_size)}; + + std::tie(min_index, max_index) = + write_index_array_data_to_buffer(dst, command.raw_index_buffer, indexed_type, + rsx::method_registers.current_draw_clause.primitive, + rsx::method_registers.restart_index_enabled(), + rsx::method_registers.restart_index(), command.ranges_to_fetch_in_index_buffer, + [](auto prim) { return !is_primitive_native(prim); }); + + m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + D3D12_INDEX_BUFFER_VIEW index_buffer_view = { + m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, (UINT)buffer_size, + get_index_type(indexed_type)}; + // m_timers.buffer_upload_size += buffer_size; + command_list->IASetIndexBuffer(&index_buffer_view); + + return std::make_tuple(true, index_count, + upload_vertex_attributes({std::make_pair(0, max_index + 1)}, get_vertex_buffers, + m_vertex_buffer_data, m_buffer_data, command_list)); + } + + std::tuple> operator()( + const rsx::draw_inlined_array& command) + { + size_t vertex_count; + std::vector vertex_buffer_view; + std::tie(vertex_buffer_view, vertex_count) = + upload_inlined_vertex_array(rsx::method_registers.vertex_arrays_info, + {(const gsl::byte*)command.inline_vertex_array.data(), + ::narrow(command.inline_vertex_array.size() * sizeof(uint))}, + m_buffer_data, m_vertex_buffer_data, command_list); + + if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) + return std::make_tuple(false, vertex_count, vertex_buffer_view); + + D3D12_INDEX_BUFFER_VIEW index_buffer_view; + size_t index_count; + std::tie(index_buffer_view, index_count) = + generate_index_buffer_for_emulated_primitives_array( + {{0, (u32)vertex_count}}, m_buffer_data); + command_list->IASetIndexBuffer(&index_buffer_view); + return std::make_tuple(true, index_count, vertex_buffer_view); + } + + private: + ID3D12GraphicsCommandList* command_list; + d3d12_data_heap& m_buffer_data; + std::function>&)> + get_vertex_buffers; + ID3D12Resource* m_vertex_buffer_data; }; - m_timers.buffer_upload_size += buffer_size; - command_list->IASetIndexBuffer(&index_buffer_view); +} // End anonymous namespace - return std::make_tuple(true, index_count, upload_vertex_attributes({ std::make_pair(0, max_index + 1) }, command_list)); +std::tuple> +D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList* command_list) +{ + return std::apply_visitor( + draw_command_visitor(command_list, m_buffer_data, m_vertex_buffer_data.Get(), + [this]( + const auto& state, const auto& list) { return get_vertex_buffers(state, list); }), + get_draw_command(rsx::method_registers)); } #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 3c0ef751ad..f7de4266b0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -141,16 +141,6 @@ private: */ std::tuple > upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list); - /** - * Upload all enabled vertex attributes for vertex in ranges described by vertex_ranges. - * A range in vertex_range is a pair whose first element is the index of the beginning of the - * range, and whose second element is the number of vertex in this range. - */ - std::vector upload_vertex_attributes(const std::vector > &vertex_ranges, - gsl::not_null command_list); - - std::tuple generate_index_buffer_for_emulated_primitives_array(const std::vector > &vertex_ranges); - void upload_and_bind_scale_offset_matrix(size_t descriptor_index); void upload_and_bind_vertex_shader_constants(size_t descriptor_index); D3D12_CONSTANT_BUFFER_VIEW_DESC upload_fragment_shader_constants(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 0ece01e1c4..5bd46e4f85 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -60,11 +60,6 @@ private: // Return element to draw and in case of indexed draw index type and offset in index buffer std::tuple > > set_vertex_buffer(); - void upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size, const u32& texture_index_offset); - - // Returns vertex count - u32 upload_inline_array(const u32 &max_vertex_attrib_size, const u32 &texture_index_offset); - public: bool load_program(); void init_buffers(bool skip_reading = false); diff --git a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp index 14846eab86..b4f58a9a89 100644 --- a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp +++ b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp @@ -196,100 +196,12 @@ namespace } throw; } -} -std::tuple > > GLGSRender::set_vertex_buffer() -{ - //initialize vertex attributes - //merge all vertex arrays - static const u32 texture_index_offset = rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count; - - std::chrono::time_point then = std::chrono::system_clock::now(); - - u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); - u32 min_index = 0, max_index = 0; - u32 max_vertex_attrib_size = 0; - u32 vertex_or_index_count; - - for (u8 index = 0; index < rsx::limits::vertex_count; ++index) - { - if (rsx::method_registers.vertex_arrays_info[index].size() || rsx::method_registers.register_vertex_info[index].size) - { - max_vertex_attrib_size += 16; - } - } - - std::optional > index_info; - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) - { - rsx::index_array_type type = rsx::method_registers.index_type(); - u32 type_size = ::narrow(get_index_type_size(type)); - - vertex_or_index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.get_elements_count()); - - u32 max_size = vertex_or_index_count * type_size; - auto mapping = m_index_ring_buffer.alloc_and_map(max_size); - void *ptr = mapping.first; - u32 offset_in_index_buffer = mapping.second; - - std::tie(min_index, max_index, vertex_or_index_count) = upload_index_buffer(get_raw_index_array(rsx::method_registers.current_draw_clause.first_count_commands), ptr, type, rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.first_count_commands, vertex_or_index_count); - min_index = 0; // we must keep index to vertex mapping - m_index_ring_buffer.unmap(); - index_info = std::make_tuple(get_index_type(type), offset_in_index_buffer); - } - else - { - u32 vertex_count; - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) - { - // We need to go through array to determine vertex count so upload it - vertex_count = upload_inline_array(max_vertex_attrib_size, texture_index_offset); - } - else - { - assert(rsx::method_registers.current_draw_clause.command == rsx::draw_command::array); - vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); - min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; - max_index = vertex_count - 1 + min_index; - } - - if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) - { - u32 offset_in_index_buffer; - std::tie(vertex_or_index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(rsx::method_registers.current_draw_clause.first_count_commands, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); - index_info = std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer); - } - else - { - vertex_or_index_count = vertex_count; - } - } - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) - { - // Already uploaded when determining vertex count, we can return here - return std::make_tuple(vertex_or_index_count, index_info); - } - - upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset); - - std::chrono::time_point now = std::chrono::system_clock::now(); - m_vertex_upload_time += std::chrono::duration_cast(now - then).count(); - - return std::make_tuple(vertex_or_index_count, index_info); -} - -namespace -{ struct vertex_buffer_visitor { - vertex_buffer_visitor(u32 vtx_cnt, - u32 texture_idx_offset, - gl::ring_buffer& heap, gl::glsl::program* prog, - gl::texture* attrib_buffer, - u32 min_texbuffer_offset) + vertex_buffer_visitor(u32 vtx_cnt, u32 texture_idx_offset, gl::ring_buffer& heap, + gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset) : vertex_count(vtx_cnt) , m_attrib_ring_info(heap) , m_program(prog) @@ -338,8 +250,8 @@ namespace case rsx::vertex_base_type::f: { const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size); - const u32 gl_type = to_gl_internal_type(vertex_register.type, vertex_register.attribute_size); - const size_t data_size = element_size; + const u32 gl_type = to_gl_internal_type(vertex_register.type, vertex_register.attribute_size); + const u32 data_size = element_size; auto& texture = m_gl_attrib_buffers[vertex_register.index]; @@ -378,103 +290,232 @@ namespace GLint m_min_texbuffer_alignment; }; -} // End anonymous namespace - -void GLGSRender::upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size, const u32& texture_index_offset) -{ - u32 verts_allocated = max_index - min_index + 1; - __glcheck m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size); - // Disable texture then reenable them - // Is it really necessary ? - for (int index = 0; index < rsx::limits::vertex_count; ++index) + struct draw_command_visitor { - int location; - if (!m_program->uniforms.has_location(s_reg_table[index], &location)) - continue; + using attribute_storage = std::vector< + std::variant>; - glActiveTexture(GL_TEXTURE0 + index + texture_index_offset); - glBindTexture(GL_TEXTURE_BUFFER, 0); - glProgramUniform1i(m_program->id(), location, index + texture_index_offset); - continue; - } - vertex_buffer_visitor visitor(verts_allocated, texture_index_offset, m_attrib_ring_buffer, m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment); - const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, { {min_index, verts_allocated } }); - for (const auto& vbo : vertex_buffers) - std::apply_visitor(visitor, vbo); - m_attrib_ring_buffer.unmap(); -} - -u32 GLGSRender::upload_inline_array(const u32 &max_vertex_attrib_size, const u32 &texture_index_offset) -{ - u32 stride = 0; - u32 offsets[rsx::limits::vertex_count] = { 0 }; - - for (u32 i = 0; i < rsx::limits::vertex_count; ++i) - { - const auto &info = rsx::method_registers.vertex_arrays_info[i]; - if (!info.size()) continue; - - offsets[i] = stride; - stride += rsx::get_vertex_type_size_on_host(info.type(), info.size()); - } - - u32 vertex_draw_count = - (u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(u32)) / - stride; - m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size); - - for (int index = 0; index < rsx::limits::vertex_count; ++index) - { - auto &vertex_info = rsx::method_registers.vertex_arrays_info[index]; - - int location; - if (!m_program->uniforms.has_location(s_reg_table[index], &location)) - continue; - - if (!vertex_info.size()) // disabled, bind a null sampler + draw_command_visitor(gl::ring_buffer& index_ring_buffer, gl::ring_buffer& attrib_ring_buffer, + gl::texture* gl_attrib_buffers, gl::glsl::program* program, GLint min_texbuffer_alignment, + std::function>)> gvb) + : m_index_ring_buffer(index_ring_buffer) + , m_attrib_ring_buffer(attrib_ring_buffer) + , m_gl_attrib_buffers(gl_attrib_buffers) + , m_program(program) + , m_min_texbuffer_alignment(min_texbuffer_alignment) + , get_vertex_buffers(gvb) { - glActiveTexture(GL_TEXTURE0 + index + texture_index_offset); - glBindTexture(GL_TEXTURE_BUFFER, 0); - glProgramUniform1i(m_program->id(), location, index + texture_index_offset); - continue; - } - - const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type(), vertex_info.size()); - u32 data_size = element_size * vertex_draw_count; - u32 gl_type = to_gl_internal_type(vertex_info.type(), vertex_info.size()); - - auto &texture = m_gl_attrib_buffers[index]; - - u8* src = - reinterpret_cast(rsx::method_registers.current_draw_clause.inline_vertex_array.data()); - auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment); - u8 *dst = static_cast(mapping.first); - - src += offsets[index]; - prepare_buffer_for_writing(dst, vertex_info.type(), vertex_info.size(), vertex_draw_count); - - //TODO: properly handle compressed data - for (u32 i = 0; i < vertex_draw_count; ++i) - { - if (vertex_info.type() == rsx::vertex_base_type::ub && vertex_info.size() == 4) - { - dst[0] = src[3]; - dst[1] = src[2]; - dst[2] = src[1]; - dst[3] = src[0]; + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { + if (rsx::method_registers.vertex_arrays_info[index].size() || + rsx::method_registers.register_vertex_info[index].size) + { + max_vertex_attrib_size += 16; + } } - else - memcpy(dst, src, element_size); - - src += stride; - dst += element_size; } - texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size); + std::tuple>> operator()( + const rsx::draw_array_command& command) + { + u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); + u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; + u32 max_index = vertex_count - 1 + min_index; - //Link texture to uniform - m_program->uniforms.texture(location, index + texture_index_offset, texture); - m_attrib_ring_buffer.unmap(); - } - return vertex_draw_count; + if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { + u32 index_count; + u32 offset_in_index_buffer; + std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw( + rsx::method_registers.current_draw_clause.first_count_commands, + rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); + + upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset); + + // std::chrono::time_point now = std::chrono::system_clock::now(); + // m_vertex_upload_time += std::chrono::duration_cast(now - + //then).count(); + + return std::make_tuple(index_count, + std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer)); + } + + upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset); + + // std::chrono::time_point now = std::chrono::system_clock::now(); + // m_vertex_upload_time += std::chrono::duration_cast(now - + //then).count(); + + return std::make_tuple(vertex_count, std::optional>()); + } + + std::tuple>> operator()( + const rsx::draw_indexed_array_command& command) + { + u32 min_index = 0, max_index = 0; + u32 max_vertex_attrib_size = 0; + + rsx::index_array_type type = rsx::method_registers.index_type(); + u32 type_size = ::narrow(get_index_type_size(type)); + + u32 index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, + rsx::method_registers.current_draw_clause.get_elements_count()); + + u32 max_size = index_count * type_size; + auto mapping = m_index_ring_buffer.alloc_and_map(max_size); + void* ptr = mapping.first; + u32 offset_in_index_buffer = mapping.second; + + u32 expanded_index_count; + std::tie(min_index, max_index, expanded_index_count) = upload_index_buffer( + command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive, + rsx::method_registers.current_draw_clause.first_count_commands, index_count); + min_index = 0; // we must keep index to vertex mapping + m_index_ring_buffer.unmap(); + + upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset); + + return std::make_tuple( + expanded_index_count, std::make_tuple(get_index_type(type), offset_in_index_buffer)); + } + + std::tuple>> operator()( + const rsx::draw_inlined_array& command) + { + // We need to go through array to determine vertex count so upload it + u32 vertex_count = upload_inline_array(max_vertex_attrib_size, texture_index_offset); + + if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { + u32 offset_in_index_buffer; + u32 index_count; + std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw( + rsx::method_registers.current_draw_clause.first_count_commands, + rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); + return std::make_tuple(index_count, + std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer)); + } + return std::make_tuple(vertex_count, std::optional>()); + } + + private: + const u32 texture_index_offset = + rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count; + u32 max_vertex_attrib_size = 0; + gl::ring_buffer& m_index_ring_buffer; + gl::ring_buffer& m_attrib_ring_buffer; + gl::texture* m_gl_attrib_buffers; + + gl::glsl::program* m_program; + GLint m_min_texbuffer_alignment; + std::function>)> + get_vertex_buffers; + + void upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size, + const u32& texture_index_offset) + { + u32 verts_allocated = max_index - min_index + 1; + __glcheck m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size); + // Disable texture then reenable them + // Is it really necessary ? + for (int index = 0; index < rsx::limits::vertex_count; ++index) { + int location; + if (!m_program->uniforms.has_location(s_reg_table[index], &location)) continue; + + glActiveTexture(GL_TEXTURE0 + index + texture_index_offset); + glBindTexture(GL_TEXTURE_BUFFER, 0); + glProgramUniform1i(m_program->id(), location, index + texture_index_offset); + continue; + } + vertex_buffer_visitor visitor(verts_allocated, texture_index_offset, m_attrib_ring_buffer, + m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment); + const auto& vertex_buffers = + get_vertex_buffers(rsx::method_registers, {{min_index, verts_allocated}}); + for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo); + m_attrib_ring_buffer.unmap(); + } + + u32 upload_inline_array(const u32& max_vertex_attrib_size, const u32& texture_index_offset) + { + u32 stride = 0; + u32 offsets[rsx::limits::vertex_count] = {0}; + + for (u32 i = 0; i < rsx::limits::vertex_count; ++i) { + const auto& info = rsx::method_registers.vertex_arrays_info[i]; + if (!info.size()) continue; + + offsets[i] = stride; + stride += rsx::get_vertex_type_size_on_host(info.type(), info.size()); + } + + u32 vertex_draw_count = + (u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(u32)) / + stride; + m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size); + + for (int index = 0; index < rsx::limits::vertex_count; ++index) { + auto& vertex_info = rsx::method_registers.vertex_arrays_info[index]; + + int location; + if (!m_program->uniforms.has_location(s_reg_table[index], &location)) continue; + + if (!vertex_info.size()) // disabled, bind a null sampler + { + glActiveTexture(GL_TEXTURE0 + index + texture_index_offset); + glBindTexture(GL_TEXTURE_BUFFER, 0); + glProgramUniform1i(m_program->id(), location, index + texture_index_offset); + continue; + } + + const u32 element_size = + rsx::get_vertex_type_size_on_host(vertex_info.type(), vertex_info.size()); + u32 data_size = element_size * vertex_draw_count; + u32 gl_type = to_gl_internal_type(vertex_info.type(), vertex_info.size()); + + auto& texture = m_gl_attrib_buffers[index]; + + u8* src = + reinterpret_cast(rsx::method_registers.current_draw_clause.inline_vertex_array.data()); + auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment); + u8* dst = static_cast(mapping.first); + + src += offsets[index]; + prepare_buffer_for_writing(dst, vertex_info.type(), vertex_info.size(), vertex_draw_count); + + // TODO: properly handle compressed data + for (u32 i = 0; i < vertex_draw_count; ++i) { + if (vertex_info.type() == rsx::vertex_base_type::ub && vertex_info.size() == 4) { + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; + } + else + memcpy(dst, src, element_size); + + src += stride; + dst += element_size; + } + + texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size); + + // Link texture to uniform + m_program->uniforms.texture(location, index + texture_index_offset, texture); + m_attrib_ring_buffer.unmap(); + } + return vertex_draw_count; + } + }; } + +std::tuple>> GLGSRender::set_vertex_buffer() +{ + std::chrono::time_point then = std::chrono::system_clock::now(); + return std::apply_visitor(draw_command_visitor(m_index_ring_buffer, m_attrib_ring_buffer, + m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment, + [this](const auto& state, const auto& list) { + return this->get_vertex_buffers(state, list); + }), + get_draw_command(rsx::method_registers)); +} + +namespace +{ +} // End anonymous namespace diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 00d7c7c075..283011c62e 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -636,6 +636,29 @@ namespace rsx return result; } + std::variant + thread::get_draw_command(const rsx::rsx_state& state) const + { + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array) { + return draw_array_command{ + rsx::method_registers.current_draw_clause.first_count_commands}; + } + + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) { + return draw_indexed_array_command{ + rsx::method_registers.current_draw_clause.first_count_commands, + get_raw_index_array( + rsx::method_registers.current_draw_clause.first_count_commands)}; + } + + if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) { + return draw_inlined_array{ + rsx::method_registers.current_draw_clause.inline_vertex_array}; + } + + fmt::throw_exception("ill-formed draw command" HERE); + } + void thread::do_internal_task() { if (m_internal_tasks.empty()) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 2e096924e3..213fafde71 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -139,6 +139,29 @@ namespace rsx u8 index; }; + struct draw_array_command + { + /** + * First and count of index subranges. + */ + std::vector> indexes_range; + }; + + struct draw_indexed_array_command + { + /** + * First and count of subranges to fetch in index buffer. + */ + std::vector> ranges_to_fetch_in_index_buffer; + + gsl::span raw_index_buffer; + }; + + struct draw_inlined_array + { + std::vector inline_vertex_array; + }; + class thread : public named_thread { std::shared_ptr m_vblank_thread; @@ -231,6 +254,8 @@ namespace rsx gsl::span get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector>& vertex_ranges) const; std::vector> get_vertex_buffers(const rsx::rsx_state& state, const std::vector>& vertex_ranges) const; + std::variant + get_draw_command(const rsx::rsx_state& state) const; private: std::mutex m_mtx_task; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 8f2ec08049..be081c8262 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -87,24 +87,6 @@ private: void prepare_rtts(); /// returns primitive topology, is_indexed, index_count, offset in index buffer, index type std::tuple > > upload_vertex_data(); - - void upload_vertex_buffers(u32 min_index, u32 vertex_max_index); - - /// returns number of vertex drawn - u32 upload_inlined_array(); - - /** - * Upload index (and expands it if primitive type is not natively supported). - * Returns min index, max index, index_count, and (offset_in_index_buffer, index_type) - */ - std::tuple> upload_index_buffer(const rsx::draw_clause &clause); - - /** - * Creates and fills an index buffer emulating unsupported primitive type. - * Returns index_count and (offset_in_index_buffer, index_type) - */ - std::tuple > generate_emulating_index_buffer(const rsx::draw_clause &clause, u32 vertex_count); - public: bool load_program(); void init_buffers(bool skip_reading = false); diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 8805fcb3fd..18bd0b1741 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -217,84 +217,47 @@ namespace "in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer", "in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer" }; -} -std::tuple > > -VKGSRender::upload_vertex_data() -{ - u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); - u32 min_index, max_index; - - bool is_indexed_draw = (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed); - u32 index_count = 0; - - std::optional > index_info; - - if (is_indexed_draw) + /** + * Creates and fills an index buffer emulating unsupported primitive type. + * Returns index_count and (offset_in_index_buffer, index_type) + */ + std::tuple> generate_emulating_index_buffer( + const rsx::draw_clause& clause, u32 vertex_count, + vk::vk_data_heap& m_index_buffer_ring_info) { - std::tie(min_index, max_index, index_count, index_info) = upload_index_buffer(rsx::method_registers.current_draw_clause); - min_index = 0; // We need correct index mapping + u32 index_count = get_index_count(clause.primitive, vertex_count); + u32 upload_size = index_count * sizeof(u16); + + VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); + void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size); + + write_index_array_for_non_indexed_non_native_primitive_to_buffer( + reinterpret_cast(buf), clause.primitive, 0, vertex_count); + + m_index_buffer_ring_info.unmap(); + return std::make_tuple( + index_count, std::make_tuple(offset_in_index_buffer, VK_INDEX_TYPE_UINT16)); } - bool primitives_emulated = false; - VkPrimitiveTopology prims = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitives_emulated); - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array) - { - if (primitives_emulated) - { - std::tie(index_count, index_info) = generate_emulating_index_buffer(rsx::method_registers.current_draw_clause, rsx::method_registers.current_draw_clause.get_elements_count()); - } - else - { - index_count = rsx::method_registers.current_draw_clause.get_elements_count(); - } - min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; - max_index = rsx::method_registers.current_draw_clause.get_elements_count() + min_index; - } - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) - { - index_count = upload_inlined_array(); - - if (primitives_emulated) - { - std::tie(index_count, index_info) = generate_emulating_index_buffer(rsx::method_registers.current_draw_clause, index_count); - } - } - - if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array || rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) - { - upload_vertex_buffers(min_index, max_index); - } - - return std::make_tuple(prims, index_count, index_info); -} - -namespace -{ struct vertex_buffer_visitor { - vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, - vk::vk_data_heap& heap, vk::glsl::program* prog, - VkDescriptorSet desc_set, - std::vector>& buffer_view_to_clean) - : vertex_count(vtx_cnt) - , m_attrib_ring_info(heap) - , device(dev) - , m_program(prog) - , descriptor_sets(desc_set) - , m_buffer_view_to_clean(buffer_view_to_clean) + vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap, + vk::glsl::program* prog, VkDescriptorSet desc_set, + std::vector>& buffer_view_to_clean) + : vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog), + descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean) { } void operator()(const rsx::vertex_array_buffer& vertex_array) { // Fill vertex_array - u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); + u32 element_size = + rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size); u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size); - u32 upload_size = real_element_size * vertex_count; + u32 upload_size = real_element_size * vertex_count; bool requires_expansion = vk::requires_component_expansion(vertex_array.type, vertex_array.attribute_size); VkDeviceSize offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(upload_size); @@ -317,14 +280,16 @@ namespace { case rsx::vertex_base_type::f: { - size_t data_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size); + size_t data_size = rsx::get_vertex_type_size_on_host( + vertex_register.type, vertex_register.attribute_size); const VkFormat format = vk::get_suitable_vk_format(vertex_register.type, vertex_register.attribute_size); u32 offset_in_attrib_buffer = 0; if (vk::requires_component_expansion(vertex_register.type, vertex_register.attribute_size)) { - const u32 num_stored_verts = static_cast(data_size / (sizeof(float) * vertex_register.attribute_size)); + const u32 num_stored_verts = static_cast( + data_size / (sizeof(float) * vertex_register.attribute_size)); const u32 real_element_size = vk::get_suitable_vk_size(vertex_register.type, vertex_register.attribute_size); data_size = real_element_size * num_stored_verts; @@ -370,125 +335,220 @@ namespace std::vector>& m_buffer_view_to_clean; }; -} // End anonymous namespace + using attribute_storage = std::vector>; -void VKGSRender::upload_vertex_buffers(u32 min_index, u32 vertex_max_index) -{ - vertex_buffer_visitor visitor(vertex_max_index - min_index + 1, *m_device, m_attrib_ring_info, m_program, descriptor_sets, m_buffer_view_to_clean); - const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}}); - for (const auto& vbo : vertex_buffers) - std::apply_visitor(visitor, vbo); -} - -u32 VKGSRender::upload_inlined_array() -{ - u32 stride = 0; - u32 offsets[rsx::limits::vertex_count] = { 0 }; - - for (u32 i = 0; i < rsx::limits::vertex_count; ++i) + struct draw_command_visitor { - const auto &info = rsx::method_registers.vertex_arrays_info[i]; - if (!info.size()) continue; + using result_type = std::tuple>>; - offsets[i] = stride; - stride += rsx::get_vertex_type_size_on_host(info.type(), info.size()); - } - - u32 vertex_draw_count = - (u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(u32)) / - stride; - - for (int index = 0; index < rsx::limits::vertex_count; ++index) - { - auto &vertex_info = rsx::method_registers.vertex_arrays_info[index]; - - if (!m_program->has_uniform(s_reg_table[index])) - continue; - - if (!vertex_info.size()) // disabled + draw_command_visitor(VkDevice device, vk::vk_data_heap& index_buffer_ring_info, + vk::vk_data_heap& attrib_ring_info, vk::glsl::program* program, + VkDescriptorSet descriptor_sets, + std::vector>& buffer_view_to_clean, + std::function>&)> + get_vertex_buffers_f) + : m_device(device), m_index_buffer_ring_info(index_buffer_ring_info), + m_attrib_ring_info(attrib_ring_info), m_program(program), + m_descriptor_sets(descriptor_sets), m_buffer_view_to_clean(buffer_view_to_clean), + get_vertex_buffers(get_vertex_buffers_f) { - continue; } - const u32 element_size = vk::get_suitable_vk_size(vertex_info.type(), vertex_info.size()); - const u32 data_size = element_size * vertex_draw_count; - const VkFormat format = vk::get_suitable_vk_format(vertex_info.type(), vertex_info.size()); - - u32 offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size); - u8* src = - reinterpret_cast(rsx::method_registers.current_draw_clause.inline_vertex_array.data()); - u8 *dst = static_cast(m_attrib_ring_info.map(offset_in_attrib_buffer, data_size)); - - src += offsets[index]; - u8 opt_size = vertex_info.size(); - - if (vertex_info.size() == 3) - opt_size = 4; - - //TODO: properly handle cmp type - if (vertex_info.type() == rsx::vertex_base_type::cmp) - LOG_ERROR(RSX, "Compressed vertex attributes not supported for inlined arrays yet"); - - switch (vertex_info.type()) + result_type operator()(const rsx::draw_array_command& command) { - case rsx::vertex_base_type::f: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); - break; - case rsx::vertex_base_type::sf: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); - break; - case rsx::vertex_base_type::s1: - case rsx::vertex_base_type::ub: - case rsx::vertex_base_type::ub256: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); - break; - case rsx::vertex_base_type::s32k: - case rsx::vertex_base_type::cmp: - vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); - break; - default: - fmt::throw_exception("Unknown base type %d" HERE, (u32)vertex_info.type()); + bool primitives_emulated = false; + VkPrimitiveTopology prims = vk::get_appropriate_topology( + rsx::method_registers.current_draw_clause.primitive, primitives_emulated); + u32 index_count = 0; + std::optional> index_info; + + u32 min_index = + rsx::method_registers.current_draw_clause.first_count_commands.front().first; + u32 max_index = + rsx::method_registers.current_draw_clause.get_elements_count() + min_index; + + if (primitives_emulated) { + std::tie(index_count, index_info) = + generate_emulating_index_buffer(rsx::method_registers.current_draw_clause, + max_index - min_index + 1, m_index_buffer_ring_info); + } + else + { + index_count = rsx::method_registers.current_draw_clause.get_elements_count(); + } + + upload_vertex_buffers(min_index, max_index); + return std::make_tuple(prims, index_count, index_info); } - m_attrib_ring_info.unmap(); - m_buffer_view_to_clean.push_back(std::make_unique(*m_device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size)); - m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], descriptor_sets); - } + result_type operator()(const rsx::draw_indexed_array_command& command) + { + bool primitives_emulated = false; + VkPrimitiveTopology prims = vk::get_appropriate_topology( + rsx::method_registers.current_draw_clause.primitive, primitives_emulated); - return vertex_draw_count; + rsx::index_array_type index_type = rsx::method_registers.index_type(); + u32 type_size = gsl::narrow(get_index_type_size(index_type)); + + u32 index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, + rsx::method_registers.current_draw_clause.get_elements_count()); + u32 upload_size = index_count * type_size; + + VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); + void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size); + + /** + * Upload index (and expands it if primitive type is not natively supported). + */ + u32 min_index, max_index; + std::tie(min_index, max_index) = write_index_array_data_to_buffer( + gsl::span(static_cast(buf), index_count * type_size), + command.raw_index_buffer, index_type, + rsx::method_registers.current_draw_clause.primitive, + rsx::method_registers.restart_index_enabled(), + rsx::method_registers.restart_index(), command.ranges_to_fetch_in_index_buffer, + [](auto prim) { return !is_primitive_native(prim); }); + + m_index_buffer_ring_info.unmap(); + + std::optional> index_info = + std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type)); + + upload_vertex_buffers(0, max_index); + return std::make_tuple(prims, index_count, index_info); + } + + result_type operator()(const rsx::draw_inlined_array& command) + { + bool primitives_emulated = false; + VkPrimitiveTopology prims = vk::get_appropriate_topology( + rsx::method_registers.current_draw_clause.primitive, primitives_emulated); + u32 index_count = upload_inlined_array(); + + if (!primitives_emulated) { + return std::make_tuple(prims, index_count, std::nullopt); + } + + std::optional> index_info; + std::tie(index_count, index_info) = generate_emulating_index_buffer( + rsx::method_registers.current_draw_clause, index_count, m_index_buffer_ring_info); + return std::make_tuple(prims, index_count, index_info); + } + + private: + vk::vk_data_heap& m_index_buffer_ring_info; + VkDevice m_device; + vk::vk_data_heap& m_attrib_ring_info; + vk::glsl::program* m_program; + VkDescriptorSet m_descriptor_sets; + std::vector>& m_buffer_view_to_clean; + std::function>&)> + get_vertex_buffers; + + void upload_vertex_buffers(u32 min_index, u32 vertex_max_index) + { + vertex_buffer_visitor visitor(vertex_max_index - min_index + 1, m_device, + m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean); + const auto& vertex_buffers = get_vertex_buffers( + rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}}); + for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo); + } + + u32 upload_inlined_array() + { + u32 stride = 0; + u32 offsets[rsx::limits::vertex_count] = {0}; + + for (u32 i = 0; i < rsx::limits::vertex_count; ++i) { + const auto& info = rsx::method_registers.vertex_arrays_info[i]; + if (!info.size()) continue; + + offsets[i] = stride; + stride += rsx::get_vertex_type_size_on_host(info.type(), info.size()); + } + + u32 vertex_draw_count = + (u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * + sizeof(u32)) / + stride; + + for (int index = 0; index < rsx::limits::vertex_count; ++index) { + auto& vertex_info = rsx::method_registers.vertex_arrays_info[index]; + + if (!m_program->has_uniform(s_reg_table[index])) continue; + + if (!vertex_info.size()) // disabled + { + continue; + } + + const u32 element_size = + vk::get_suitable_vk_size(vertex_info.type(), vertex_info.size()); + const u32 data_size = element_size * vertex_draw_count; + const VkFormat format = + vk::get_suitable_vk_format(vertex_info.type(), vertex_info.size()); + + u32 offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size); + u8* src = reinterpret_cast( + rsx::method_registers.current_draw_clause.inline_vertex_array.data()); + u8* dst = + static_cast(m_attrib_ring_info.map(offset_in_attrib_buffer, data_size)); + + src += offsets[index]; + u8 opt_size = vertex_info.size(); + + if (vertex_info.size() == 3) opt_size = 4; + + // TODO: properly handle cmp type + if (vertex_info.type() == rsx::vertex_base_type::cmp) + LOG_ERROR( + RSX, "Compressed vertex attributes not supported for inlined arrays yet"); + + switch (vertex_info.type()) + { + case rsx::vertex_base_type::f: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, + vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); + break; + case rsx::vertex_base_type::sf: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, + vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); + break; + case rsx::vertex_base_type::s1: + case rsx::vertex_base_type::ub: + case rsx::vertex_base_type::ub256: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, + vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); + break; + case rsx::vertex_base_type::s32k: + case rsx::vertex_base_type::cmp: + vk::copy_inlined_data_to_buffer(src, dst, vertex_draw_count, + vertex_info.type(), vertex_info.size(), opt_size, element_size, stride); + break; + default: fmt::throw_exception("Unknown base type %d" HERE, (u32)vertex_info.type()); + } + + m_attrib_ring_info.unmap(); + m_buffer_view_to_clean.push_back(std::make_unique(m_device, + m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size)); + m_program->bind_uniform( + m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets); + } + + return vertex_draw_count; + } + }; } -std::tuple> VKGSRender::upload_index_buffer(const rsx::draw_clause &clause) +std::tuple>> +VKGSRender::upload_vertex_data() { - rsx::index_array_type index_type = rsx::method_registers.index_type(); - u32 type_size = gsl::narrow(get_index_type_size(index_type)); - - u32 index_count = get_index_count(clause.primitive, clause.get_elements_count()); - u32 upload_size = index_count * type_size; - - VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); - void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size); - - u32 min_index, max_index; - std::tie(min_index, max_index) = write_index_array_data_to_buffer(gsl::span(static_cast(buf), index_count * type_size), get_raw_index_array(clause.first_count_commands), - index_type, clause.primitive, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(), clause.first_count_commands, - [](auto prim) { return !is_primitive_native(prim); }); - - m_index_buffer_ring_info.unmap(); - return std::make_tuple(min_index, max_index, index_count, std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type))); -} - - -std::tuple > VKGSRender::generate_emulating_index_buffer(const rsx::draw_clause &clause, u32 vertex_count) -{ - u32 index_count = get_index_count(clause.primitive, vertex_count); - u32 upload_size = index_count * sizeof(u16); - - VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size); - void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size); - - write_index_array_for_non_indexed_non_native_primitive_to_buffer(reinterpret_cast(buf), clause.primitive, 0, vertex_count); - - m_index_buffer_ring_info.unmap(); - return std::make_tuple(index_count, std::make_tuple(offset_in_index_buffer, VK_INDEX_TYPE_UINT16)); + draw_command_visitor visitor(*m_device, m_index_buffer_ring_info, m_attrib_ring_info, m_program, + descriptor_sets, m_buffer_view_to_clean, + [this](const auto& state, const auto& range) { return get_vertex_buffers(state, range); }); + return std::apply_visitor(visitor, get_draw_command(rsx::method_registers)); } From f87e396958b5262e3d783cc477cbb4cf47b31c21 Mon Sep 17 00:00:00 2001 From: vlj Date: Mon, 12 Sep 2016 23:50:52 +0200 Subject: [PATCH 4/4] rsx: Dump DRAW_ARRAYS and DRAW_INDEX_ARRAYS. --- rpcs3/Emu/RSX/rsx_decode.h | 280 ++++++++++++++++------------------ rpcs3/Emu/RSX/rsx_methods.cpp | 12 +- 2 files changed, 134 insertions(+), 158 deletions(-) diff --git a/rpcs3/Emu/RSX/rsx_decode.h b/rpcs3/Emu/RSX/rsx_decode.h index 5049e089da..fe323616fe 100644 --- a/rpcs3/Emu/RSX/rsx_decode.h +++ b/rpcs3/Emu/RSX/rsx_decode.h @@ -4309,6 +4309,78 @@ struct registers_decoder } }; +template <> +struct registers_decoder +{ + struct decoded_type + { + private: + union { + u32 raw_value; + bitfield_decoder_t<0, 24> start; + bitfield_decoder_t<24, 8> count; + } m_data; + + public: + decoded_type(u32 raw_value) + { + m_data.raw_value = raw_value; + } + + u32 start() const + { + return m_data.start; + } + + u16 count() const + { + return m_data.count + 1; + } + }; + + static std::string dump(decoded_type&& decoded_values) + { + return "Draw vertexes range [" + std::to_string(decoded_values.start()) + ", " + + std::to_string(decoded_values.start() + decoded_values.count()) + "]"; + } +}; + +template <> +struct registers_decoder +{ + struct decoded_type + { + private: + union { + u32 raw_value; + bitfield_decoder_t<0, 24> start; + bitfield_decoder_t<24, 8> count; + } m_data; + + public: + decoded_type(u32 raw_value) + { + m_data.raw_value = raw_value; + } + + u32 start() const + { + return m_data.start; + } + + u16 count() const + { + return m_data.count + 1; + } + }; + + static std::string dump(decoded_type&& decoded_values) + { + return "Draw vertexes range [IdxArray[" + std::to_string(decoded_values.start()) + + "], IdxArray[" + std::to_string(decoded_values.start() + decoded_values.count()) + "}]"; + } +}; + #define TRANSFORM_PROGRAM(index) template<> struct registers_decoder : public transform_program_helper {}; #define DECLARE_TRANSFORM_PROGRAM(index) NV4097_SET_TRANSFORM_PROGRAM + index, EXPAND_RANGE_512(0, TRANSFORM_PROGRAM) @@ -4504,156 +4576,60 @@ EXPAND_RANGE_16(0, VERTEX_DATA4F) EXPAND_RANGE_16(0, VERTEX_DATA2S) EXPAND_RANGE_16(0, VERTEX_DATA4S) -constexpr std::integer_sequence opcode_list{}; +constexpr std::integer_sequence + opcode_list{}; } // end namespace rsx diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index ff4946116a..c3cd80946f 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -188,19 +188,19 @@ namespace rsx void draw_arrays(thread* rsx, u32 _reg, u32 arg) { rsx::method_registers.current_draw_clause.command = rsx::draw_command::array; - u32 first = arg & 0xffffff; - u32 count = (arg >> 24) + 1; + rsx::registers_decoder::decoded_type v(arg); - rsx::method_registers.current_draw_clause.first_count_commands.emplace_back(std::make_pair(first, count)); + rsx::method_registers.current_draw_clause.first_count_commands.emplace_back( + std::make_pair(v.start(), v.count())); } void draw_index_array(thread* rsx, u32 _reg, u32 arg) { rsx::method_registers.current_draw_clause.command = rsx::draw_command::indexed; - u32 first = arg & 0xffffff; - u32 count = (arg >> 24) + 1; + rsx::registers_decoder::decoded_type v(arg); - rsx::method_registers.current_draw_clause.first_count_commands.emplace_back(std::make_pair(first, count)); + rsx::method_registers.current_draw_clause.first_count_commands.emplace_back( + std::make_pair(v.start(), v.count())); } void draw_inline_array(thread* rsx, u32 _reg, u32 arg)