From 061824a7ec6f4348a8ffea5721b04e128a980c5e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 7 Sep 2017 22:32:52 +0300 Subject: [PATCH] rsx: Add support for batched multidraw gl: Fix multidraw [WIP] rsx: Ignore vertex base when data source is generated using arithmetic vk: Check pending flag before doing fence poke vk/gl: Fix for inlined array and immediate draws rsx: Collapse joined draws when batching --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 76 +++++++++++++- rpcs3/Emu/RSX/GL/GLProcTable.h | 1 + rpcs3/Emu/RSX/RSXThread.cpp | 174 +++++++++++++++++++++++++++++++- rpcs3/Emu/RSX/RSXThread.h | 8 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 45 +++++++-- rpcs3/Emu/RSX/rsx_methods.cpp | 2 +- rpcs3/Emu/RSX/rsx_methods.h | 1 + 7 files changed, 288 insertions(+), 19 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index fe8e83d6a4..b1179717a2 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -30,6 +30,8 @@ GLGSRender::GLGSRender() : GSRender() m_vertex_cache.reset(new gl::null_vertex_cache()); else m_vertex_cache.reset(new gl::weak_vertex_cache()); + + supports_multidraw = true; } extern CellGcmContextData current_context; @@ -510,21 +512,85 @@ void GLGSRender::end() m_program->validate(); } + const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive); + bool single_draw = rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive; + if (indexed_draw_info) { const GLenum index_type = std::get<0>(indexed_draw_info.value()); const u32 index_offset = std::get<1>(indexed_draw_info.value()); - if (__glcheck gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) + if (gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) { - __glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff); + glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff); } - __glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset); + if (single_draw) + { + glDrawElements(draw_mode, vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset); + } + else + { + std::vector counts; + std::vector offsets; + + const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size(); + const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2; + uintptr_t index_ptr = index_offset; + + counts.reserve(draw_count); + offsets.reserve(draw_count); + + for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) + { + const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second); + counts.push_back(index_size); + offsets.push_back((const GLvoid*)index_ptr); + + index_ptr += (index_size << type_scale); + } + + for (int i = 0; i < draw_count; ++i) + { + if (counts[i] > 0) + glDrawElements(draw_mode, counts[i], index_type, offsets[i]); + } + + //glMultiDrawElements(draw_mode, counts.data(), index_type, offsets.data(), (GLsizei)draw_count); + } } else { - glDrawArrays(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), 0, vertex_draw_count); + if (single_draw) + { + glDrawArrays(draw_mode, 0, vertex_draw_count); + } + else + { + std::vector firsts; + std::vector counts; + const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size(); + + firsts.reserve(draw_count); + counts.reserve(draw_count); + + u32 base_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first; + for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) + { + firsts.push_back(range.first - base_index); + counts.push_back(range.second); + } + + ///* + // TEST FOR DRIVER BUGS - AMD: SHAME, SHAME, SHAME + for (int i = 0; i < draw_count; i++) + { + if (counts[i] > 0) + glDrawArrays(draw_mode, firsts[i], counts[i]); + }//*/ + + //glMultiDrawArrays(draw_mode, firsts.data(), counts.data(), (GLsizei)draw_count); + } } m_attrib_ring_buffer->notify(); @@ -570,7 +636,7 @@ void GLGSRender::set_viewport() //NOTE: window origin does not affect scissor region (probably only affects viewport matrix; already applied) //See LIMBO [NPUB-30373] which uses shader window origin = top - __glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h); + glScissor(scissor_x, scissor_y, scissor_w, scissor_h); glEnable(GL_SCISSOR_TEST); } diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index 44e14e7ba2..9866f07bac 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -168,6 +168,7 @@ OPENGL_PROC(PFNGLMAPBUFFERRANGEPROC, MapBufferRange); OPENGL_PROC(PFNGLBINDBUFFERRANGEPROC, BindBufferRange); OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase); +OPENGL_PROC(PFNGLMULTIDRAWELEMENTSPROC, MultiDrawElements); OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays); OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index c2a2aa1134..e39489feda 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -277,6 +277,21 @@ namespace rsx { rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0); in_begin_end = true; + + switch (rsx::method_registers.current_draw_clause.primitive) + { + case rsx::primitive_type::line_loop: + case rsx::primitive_type::line_strip: + case rsx::primitive_type::polygon: + case rsx::primitive_type::quad_strip: + case rsx::primitive_type::triangle_fan: + case rsx::primitive_type::triangle_strip: + // Adjacency matters for these types + rsx::method_registers.current_draw_clause.is_disjoint_primitive = false; + break; + default: + rsx::method_registers.current_draw_clause.is_disjoint_primitive = true; + } } void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value) @@ -376,6 +391,65 @@ namespace rsx // Raise priority above other threads thread_ctrl::set_native_priority(1); + // Deferred calls are used to batch draws together + u32 deferred_primitive_type = 0; + u32 deferred_call_size = 0; + bool has_deferred_call = false; + + auto flush_command_queue = [&]() + { + //TODO: Split first-count pairs if not consecutive + bool split_command = false; + std::vector > split_ranges; + auto first_count_cmds = method_registers.current_draw_clause.first_count_commands; + + if (method_registers.current_draw_clause.first_count_commands.size() > 1) + { + u32 next = method_registers.current_draw_clause.first_count_commands.front().first; + u32 last_head = 0; + + for (int n = 0; n < first_count_cmds.size(); ++n) + { + const auto &v = first_count_cmds[n]; + if (v.first != next) + { + split_command = true; + split_ranges.push_back(std::make_pair(last_head, n)); + last_head = n + 1; + } + + next = v.first + v.second; + } + + if (split_command) + split_ranges.push_back(std::make_pair(last_head, first_count_cmds.size() - 1)); + } + + if (!split_command) + { + methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0); + } + else + { + std::vector> tmp; + auto list_head = first_count_cmds.begin(); + + for (auto &range : split_ranges) + { + tmp.resize(range.second - range.first + 1); + std::copy(list_head + range.first, list_head + range.second, tmp.begin()); + + methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, deferred_primitive_type); + method_registers.current_draw_clause.first_count_commands = tmp; + methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0); + } + } + + deferred_primitive_type = 0; + deferred_call_size = 0; + has_deferred_call = false; + }; + // TODO: exit condition while (!Emu.IsStopped()) { @@ -387,6 +461,9 @@ namespace rsx if (put == get || !Emu.IsRunning()) { + if (has_deferred_call) + flush_command_queue(); + do_internal_task(); continue; } @@ -472,7 +549,92 @@ namespace rsx u32 reg = ((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD) ? first_cmd : first_cmd + i; u32 value = args[i]; - //LOG_NOTICE(RSX, "%s(0x%x) = 0x%x", get_method_name(reg).c_str(), reg, value); + bool execute_method_call = true; + + if (supports_multidraw) + { + //TODO: Make this cleaner + bool flush_commands_flag = has_deferred_call; + + switch (reg) + { + case NV4097_SET_BEGIN_END: + { + // Hook; Allows begin to go through, but ignores end + if (value && value != deferred_primitive_type) + deferred_primitive_type = value; + else + { + deferred_call_size++; + + // Combine all calls since the last one + auto &first_count = method_registers.current_draw_clause.first_count_commands; + if (first_count.size() > deferred_call_size) + { + const auto &batch_first_count = first_count[deferred_call_size - 1]; + u32 count = batch_first_count.second; + u32 next = batch_first_count.first + count; + + for (int n = deferred_call_size; n < first_count.size(); n++) + { + if (first_count[n].first != next) + { + LOG_ERROR(RSX, "Non-continous first-count range passed as one draw; will be split."); + + first_count[deferred_call_size - 1].second = count; + deferred_call_size++; + + count = first_count[deferred_call_size - 1].second; + next = first_count[deferred_call_size - 1].first + count; + continue; + } + + count += first_count[n].second; + next += first_count[n].second; + } + + first_count[deferred_call_size - 1].second = count; + first_count.resize(deferred_call_size); + } + + has_deferred_call = true; + flush_commands_flag = false; + execute_method_call = false; + } + + break; + } + // These commands do not alter the pipeline state and deferred calls can still be active + // TODO: Add more commands here + case NV4097_INVALIDATE_VERTEX_FILE: + flush_commands_flag = false; + break; + case NV4097_DRAW_ARRAYS: + { + const auto cmd = method_registers.current_draw_clause.command; + if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none) + break; + + flush_commands_flag = false; + break; + } + case NV4097_DRAW_INDEX_ARRAY: + { + const auto cmd = method_registers.current_draw_clause.command; + if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none) + break; + + flush_commands_flag = false; + break; + } + } + + if (flush_commands_flag) + { + flush_command_queue(); + } + } + method_registers.decode(reg, value); if (capture_current_frame) @@ -480,9 +642,12 @@ namespace rsx frame_debug.command_queue.push_back(std::make_pair(reg, value)); } - if (auto method = methods[reg]) + if (execute_method_call) { - method(this, reg, value); + if (auto method = methods[reg]) + { + method(this, reg, value); + } } if (invalid_command_interrupt_raised) @@ -1534,7 +1699,7 @@ namespace rsx for (const auto &block : layout.interleaved_blocks) { u32 unique_verts; - u32 vertex_base = first_vertex * block.attribute_stride; + u32 vertex_base = 0; if (block.single_vertex) { @@ -1553,6 +1718,7 @@ namespace rsx else { unique_verts = vertex_count; + vertex_base = first_vertex * block.attribute_stride; } const u32 data_size = block.attribute_stride * unique_verts; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index a50c049528..960c1bd8db 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -144,6 +144,11 @@ namespace rsx std::array vertex_push_buffers; std::vector element_push_buffer; + s32 m_skip_frame_ctr = 0; + bool skip_frame = false; + + bool supports_multidraw = false; + public: RsxDmaControl* ctrl = nullptr; @@ -183,9 +188,6 @@ namespace rsx bool m_transform_constants_dirty; bool m_textures_dirty[16]; - protected: - s32 m_skip_frame_ctr = 0; - bool skip_frame = false; protected: std::array get_color_surface_addresses() const; u32 get_zeta_surface_address() const; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 1bc1728f28..85cb19fb45 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -637,6 +637,8 @@ VKGSRender::VKGSRender() : GSRender() } m_current_frame = &frame_context_storage[0]; + + supports_multidraw = true; } VKGSRender::~VKGSRender() @@ -1166,10 +1168,23 @@ void VKGSRender::end() } std::optional > index_info = std::get<4>(upload_info); + bool single_draw = rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive; + if (!index_info) { - const auto vertex_count = std::get<1>(upload_info); - vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0); + if (single_draw) + { + const auto vertex_count = std::get<1>(upload_info); + vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0); + } + else + { + const auto base_vertex = rsx::method_registers.current_draw_clause.first_count_commands.front().first; + for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) + { + vkCmdDraw(*m_current_command_buffer, range.second, 1, range.first - base_vertex, 0); + } + } } else { @@ -1178,9 +1193,22 @@ void VKGSRender::end() VkDeviceSize offset; std::tie(offset, index_type) = index_info.value(); - vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); - vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0); + + if (single_draw) + { + vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0); + } + else + { + u32 first_vertex = 0; + for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands) + { + const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second); + vkCmdDrawIndexed(*m_current_command_buffer, verts, 1, 0, first_vertex, 0); + first_vertex += verts; + } + } } vk::leave_uninterruptible(); @@ -1441,7 +1469,10 @@ void VKGSRender::flush_command_queue(bool hard_sync) //Clear all command buffer statuses for (auto &cb : m_primary_cb_list) - cb.poke(); + { + if (cb.pending) + cb.poke(); + } m_last_flushable_cb = -1; m_flush_commands = false; @@ -1623,7 +1654,9 @@ void VKGSRender::do_local_task() if (m_last_flushable_cb > -1) { auto cb = &m_primary_cb_list[m_last_flushable_cb]; - cb->poke(); + + if (cb->pending) + cb->poke(); if (!cb->pending) m_last_flushable_cb = -1; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 1f40babb81..55ceae6840 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -300,7 +300,7 @@ namespace rsx { if (arg) { - rsx::method_registers.current_draw_clause.first_count_commands.clear(); + rsx::method_registers.current_draw_clause.first_count_commands.resize(0); rsx::method_registers.current_draw_clause.command = draw_command::none; rsx::method_registers.current_draw_clause.primitive = rsx::method_registers.primitive_mode(); rsxthr->begin(); diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index 4c54e5e4ff..4ca0a5c1ee 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -29,6 +29,7 @@ namespace rsx draw_command command; bool is_immediate_draw; + bool is_disjoint_primitive; std::vector inline_vertex_array;