rsx: Add support for batched multidraw

gl: Fix multidraw [WIP]
rsx: Ignore vertex base when data source is generated using arithmetic
vk: Check pending flag before doing fence poke
vk/gl: Fix for inlined array and immediate draws
rsx: Collapse joined draws when batching
This commit is contained in:
kd-11 2017-09-07 22:32:52 +03:00
parent abb56a354d
commit 061824a7ec
7 changed files with 288 additions and 19 deletions

View File

@ -30,6 +30,8 @@ GLGSRender::GLGSRender() : GSRender()
m_vertex_cache.reset(new gl::null_vertex_cache()); m_vertex_cache.reset(new gl::null_vertex_cache());
else else
m_vertex_cache.reset(new gl::weak_vertex_cache()); m_vertex_cache.reset(new gl::weak_vertex_cache());
supports_multidraw = true;
} }
extern CellGcmContextData current_context; extern CellGcmContextData current_context;
@ -510,21 +512,85 @@ void GLGSRender::end()
m_program->validate(); m_program->validate();
} }
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive);
bool single_draw = rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive;
if (indexed_draw_info) if (indexed_draw_info)
{ {
const GLenum index_type = std::get<0>(indexed_draw_info.value()); const GLenum index_type = std::get<0>(indexed_draw_info.value());
const u32 index_offset = std::get<1>(indexed_draw_info.value()); const u32 index_offset = std::get<1>(indexed_draw_info.value());
if (__glcheck gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) if (gl_state.enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
{ {
__glcheck glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff); glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
} }
__glcheck glDrawElements(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset); if (single_draw)
{
glDrawElements(draw_mode, vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
}
else
{
std::vector<GLsizei> counts;
std::vector<const GLvoid*> offsets;
const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size();
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
uintptr_t index_ptr = index_offset;
counts.reserve(draw_count);
offsets.reserve(draw_count);
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
{
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second);
counts.push_back(index_size);
offsets.push_back((const GLvoid*)index_ptr);
index_ptr += (index_size << type_scale);
}
for (int i = 0; i < draw_count; ++i)
{
if (counts[i] > 0)
glDrawElements(draw_mode, counts[i], index_type, offsets[i]);
}
//glMultiDrawElements(draw_mode, counts.data(), index_type, offsets.data(), (GLsizei)draw_count);
}
} }
else else
{ {
glDrawArrays(gl::draw_mode(rsx::method_registers.current_draw_clause.primitive), 0, vertex_draw_count); if (single_draw)
{
glDrawArrays(draw_mode, 0, vertex_draw_count);
}
else
{
std::vector<GLint> firsts;
std::vector<GLsizei> counts;
const auto draw_count = rsx::method_registers.current_draw_clause.first_count_commands.size();
firsts.reserve(draw_count);
counts.reserve(draw_count);
u32 base_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
{
firsts.push_back(range.first - base_index);
counts.push_back(range.second);
}
///*
// TEST FOR DRIVER BUGS - AMD: SHAME, SHAME, SHAME
for (int i = 0; i < draw_count; i++)
{
if (counts[i] > 0)
glDrawArrays(draw_mode, firsts[i], counts[i]);
}//*/
//glMultiDrawArrays(draw_mode, firsts.data(), counts.data(), (GLsizei)draw_count);
}
} }
m_attrib_ring_buffer->notify(); m_attrib_ring_buffer->notify();
@ -570,7 +636,7 @@ void GLGSRender::set_viewport()
//NOTE: window origin does not affect scissor region (probably only affects viewport matrix; already applied) //NOTE: window origin does not affect scissor region (probably only affects viewport matrix; already applied)
//See LIMBO [NPUB-30373] which uses shader window origin = top //See LIMBO [NPUB-30373] which uses shader window origin = top
__glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h); glScissor(scissor_x, scissor_y, scissor_w, scissor_h);
glEnable(GL_SCISSOR_TEST); glEnable(GL_SCISSOR_TEST);
} }

View File

@ -168,6 +168,7 @@ OPENGL_PROC(PFNGLMAPBUFFERRANGEPROC, MapBufferRange);
OPENGL_PROC(PFNGLBINDBUFFERRANGEPROC, BindBufferRange); OPENGL_PROC(PFNGLBINDBUFFERRANGEPROC, BindBufferRange);
OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase); OPENGL_PROC(PFNGLBINDBUFFERBASEPROC, BindBufferBase);
OPENGL_PROC(PFNGLMULTIDRAWELEMENTSPROC, MultiDrawElements);
OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays); OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays);
OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT); OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT);

View File

@ -277,6 +277,21 @@ namespace rsx
{ {
rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0); rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0);
in_begin_end = true; in_begin_end = true;
switch (rsx::method_registers.current_draw_clause.primitive)
{
case rsx::primitive_type::line_loop:
case rsx::primitive_type::line_strip:
case rsx::primitive_type::polygon:
case rsx::primitive_type::quad_strip:
case rsx::primitive_type::triangle_fan:
case rsx::primitive_type::triangle_strip:
// Adjacency matters for these types
rsx::method_registers.current_draw_clause.is_disjoint_primitive = false;
break;
default:
rsx::method_registers.current_draw_clause.is_disjoint_primitive = true;
}
} }
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value) void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
@ -376,6 +391,65 @@ namespace rsx
// Raise priority above other threads // Raise priority above other threads
thread_ctrl::set_native_priority(1); thread_ctrl::set_native_priority(1);
// Deferred calls are used to batch draws together
u32 deferred_primitive_type = 0;
u32 deferred_call_size = 0;
bool has_deferred_call = false;
auto flush_command_queue = [&]()
{
//TODO: Split first-count pairs if not consecutive
bool split_command = false;
std::vector <std::pair<u32, u32>> split_ranges;
auto first_count_cmds = method_registers.current_draw_clause.first_count_commands;
if (method_registers.current_draw_clause.first_count_commands.size() > 1)
{
u32 next = method_registers.current_draw_clause.first_count_commands.front().first;
u32 last_head = 0;
for (int n = 0; n < first_count_cmds.size(); ++n)
{
const auto &v = first_count_cmds[n];
if (v.first != next)
{
split_command = true;
split_ranges.push_back(std::make_pair(last_head, n));
last_head = n + 1;
}
next = v.first + v.second;
}
if (split_command)
split_ranges.push_back(std::make_pair(last_head, first_count_cmds.size() - 1));
}
if (!split_command)
{
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);
}
else
{
std::vector<std::pair<u32, u32>> tmp;
auto list_head = first_count_cmds.begin();
for (auto &range : split_ranges)
{
tmp.resize(range.second - range.first + 1);
std::copy(list_head + range.first, list_head + range.second, tmp.begin());
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, deferred_primitive_type);
method_registers.current_draw_clause.first_count_commands = tmp;
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);
}
}
deferred_primitive_type = 0;
deferred_call_size = 0;
has_deferred_call = false;
};
// TODO: exit condition // TODO: exit condition
while (!Emu.IsStopped()) while (!Emu.IsStopped())
{ {
@ -387,6 +461,9 @@ namespace rsx
if (put == get || !Emu.IsRunning()) if (put == get || !Emu.IsRunning())
{ {
if (has_deferred_call)
flush_command_queue();
do_internal_task(); do_internal_task();
continue; continue;
} }
@ -472,7 +549,92 @@ namespace rsx
u32 reg = ((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD) ? first_cmd : first_cmd + i; u32 reg = ((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD) ? first_cmd : first_cmd + i;
u32 value = args[i]; u32 value = args[i];
//LOG_NOTICE(RSX, "%s(0x%x) = 0x%x", get_method_name(reg).c_str(), reg, value); bool execute_method_call = true;
if (supports_multidraw)
{
//TODO: Make this cleaner
bool flush_commands_flag = has_deferred_call;
switch (reg)
{
case NV4097_SET_BEGIN_END:
{
// Hook; Allows begin to go through, but ignores end
if (value && value != deferred_primitive_type)
deferred_primitive_type = value;
else
{
deferred_call_size++;
// Combine all calls since the last one
auto &first_count = method_registers.current_draw_clause.first_count_commands;
if (first_count.size() > deferred_call_size)
{
const auto &batch_first_count = first_count[deferred_call_size - 1];
u32 count = batch_first_count.second;
u32 next = batch_first_count.first + count;
for (int n = deferred_call_size; n < first_count.size(); n++)
{
if (first_count[n].first != next)
{
LOG_ERROR(RSX, "Non-continous first-count range passed as one draw; will be split.");
first_count[deferred_call_size - 1].second = count;
deferred_call_size++;
count = first_count[deferred_call_size - 1].second;
next = first_count[deferred_call_size - 1].first + count;
continue;
}
count += first_count[n].second;
next += first_count[n].second;
}
first_count[deferred_call_size - 1].second = count;
first_count.resize(deferred_call_size);
}
has_deferred_call = true;
flush_commands_flag = false;
execute_method_call = false;
}
break;
}
// These commands do not alter the pipeline state and deferred calls can still be active
// TODO: Add more commands here
case NV4097_INVALIDATE_VERTEX_FILE:
flush_commands_flag = false;
break;
case NV4097_DRAW_ARRAYS:
{
const auto cmd = method_registers.current_draw_clause.command;
if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none)
break;
flush_commands_flag = false;
break;
}
case NV4097_DRAW_INDEX_ARRAY:
{
const auto cmd = method_registers.current_draw_clause.command;
if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none)
break;
flush_commands_flag = false;
break;
}
}
if (flush_commands_flag)
{
flush_command_queue();
}
}
method_registers.decode(reg, value); method_registers.decode(reg, value);
if (capture_current_frame) if (capture_current_frame)
@ -480,9 +642,12 @@ namespace rsx
frame_debug.command_queue.push_back(std::make_pair(reg, value)); frame_debug.command_queue.push_back(std::make_pair(reg, value));
} }
if (auto method = methods[reg]) if (execute_method_call)
{ {
method(this, reg, value); if (auto method = methods[reg])
{
method(this, reg, value);
}
} }
if (invalid_command_interrupt_raised) if (invalid_command_interrupt_raised)
@ -1534,7 +1699,7 @@ namespace rsx
for (const auto &block : layout.interleaved_blocks) for (const auto &block : layout.interleaved_blocks)
{ {
u32 unique_verts; u32 unique_verts;
u32 vertex_base = first_vertex * block.attribute_stride; u32 vertex_base = 0;
if (block.single_vertex) if (block.single_vertex)
{ {
@ -1553,6 +1718,7 @@ namespace rsx
else else
{ {
unique_verts = vertex_count; unique_verts = vertex_count;
vertex_base = first_vertex * block.attribute_stride;
} }
const u32 data_size = block.attribute_stride * unique_verts; const u32 data_size = block.attribute_stride * unique_verts;

View File

@ -144,6 +144,11 @@ namespace rsx
std::array<push_buffer_vertex_info, 16> vertex_push_buffers; std::array<push_buffer_vertex_info, 16> vertex_push_buffers;
std::vector<u32> element_push_buffer; std::vector<u32> element_push_buffer;
s32 m_skip_frame_ctr = 0;
bool skip_frame = false;
bool supports_multidraw = false;
public: public:
RsxDmaControl* ctrl = nullptr; RsxDmaControl* ctrl = nullptr;
@ -183,9 +188,6 @@ namespace rsx
bool m_transform_constants_dirty; bool m_transform_constants_dirty;
bool m_textures_dirty[16]; bool m_textures_dirty[16];
protected:
s32 m_skip_frame_ctr = 0;
bool skip_frame = false;
protected: protected:
std::array<u32, 4> get_color_surface_addresses() const; std::array<u32, 4> get_color_surface_addresses() const;
u32 get_zeta_surface_address() const; u32 get_zeta_surface_address() const;

View File

@ -637,6 +637,8 @@ VKGSRender::VKGSRender() : GSRender()
} }
m_current_frame = &frame_context_storage[0]; m_current_frame = &frame_context_storage[0];
supports_multidraw = true;
} }
VKGSRender::~VKGSRender() VKGSRender::~VKGSRender()
@ -1166,10 +1168,23 @@ void VKGSRender::end()
} }
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<4>(upload_info); std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<4>(upload_info);
bool single_draw = rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive;
if (!index_info) if (!index_info)
{ {
const auto vertex_count = std::get<1>(upload_info); if (single_draw)
vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0); {
const auto vertex_count = std::get<1>(upload_info);
vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0);
}
else
{
const auto base_vertex = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
{
vkCmdDraw(*m_current_command_buffer, range.second, 1, range.first - base_vertex, 0);
}
}
} }
else else
{ {
@ -1178,9 +1193,22 @@ void VKGSRender::end()
VkDeviceSize offset; VkDeviceSize offset;
std::tie(offset, index_type) = index_info.value(); std::tie(offset, index_type) = index_info.value();
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
if (single_draw)
{
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
}
else
{
u32 first_vertex = 0;
for (const auto &range : rsx::method_registers.current_draw_clause.first_count_commands)
{
const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.second);
vkCmdDrawIndexed(*m_current_command_buffer, verts, 1, 0, first_vertex, 0);
first_vertex += verts;
}
}
} }
vk::leave_uninterruptible(); vk::leave_uninterruptible();
@ -1441,7 +1469,10 @@ void VKGSRender::flush_command_queue(bool hard_sync)
//Clear all command buffer statuses //Clear all command buffer statuses
for (auto &cb : m_primary_cb_list) for (auto &cb : m_primary_cb_list)
cb.poke(); {
if (cb.pending)
cb.poke();
}
m_last_flushable_cb = -1; m_last_flushable_cb = -1;
m_flush_commands = false; m_flush_commands = false;
@ -1623,7 +1654,9 @@ void VKGSRender::do_local_task()
if (m_last_flushable_cb > -1) if (m_last_flushable_cb > -1)
{ {
auto cb = &m_primary_cb_list[m_last_flushable_cb]; auto cb = &m_primary_cb_list[m_last_flushable_cb];
cb->poke();
if (cb->pending)
cb->poke();
if (!cb->pending) if (!cb->pending)
m_last_flushable_cb = -1; m_last_flushable_cb = -1;

View File

@ -300,7 +300,7 @@ namespace rsx
{ {
if (arg) if (arg)
{ {
rsx::method_registers.current_draw_clause.first_count_commands.clear(); rsx::method_registers.current_draw_clause.first_count_commands.resize(0);
rsx::method_registers.current_draw_clause.command = draw_command::none; rsx::method_registers.current_draw_clause.command = draw_command::none;
rsx::method_registers.current_draw_clause.primitive = rsx::method_registers.primitive_mode(); rsx::method_registers.current_draw_clause.primitive = rsx::method_registers.primitive_mode();
rsxthr->begin(); rsxthr->begin();

View File

@ -29,6 +29,7 @@ namespace rsx
draw_command command; draw_command command;
bool is_immediate_draw; bool is_immediate_draw;
bool is_disjoint_primitive;
std::vector<u32> inline_vertex_array; std::vector<u32> inline_vertex_array;