mirror of https://github.com/RPCS3/rpcs3.git
gl: Workaround for poor AMD OpenGL performance
- Turns out the AMD driver really hates it if you render with a mapped index buffer. The driver internally seems to make a copy of the consumed indices and uses that. Very slow. I was able to isolate this after observing that glDrawArrays is not entirely shit, but glDrawElements duration scaled linearly with the number of vertices.
This commit is contained in:
parent
943752db30
commit
1b305bf789
|
@ -236,14 +236,14 @@ void GLGSRender::on_init_thread()
|
||||||
m_vertex_env_buffer = std::make_unique<gl::ring_buffer>();
|
m_vertex_env_buffer = std::make_unique<gl::ring_buffer>();
|
||||||
m_texture_parameters_buffer = std::make_unique<gl::ring_buffer>();
|
m_texture_parameters_buffer = std::make_unique<gl::ring_buffer>();
|
||||||
m_vertex_layout_buffer = std::make_unique<gl::ring_buffer>();
|
m_vertex_layout_buffer = std::make_unique<gl::ring_buffer>();
|
||||||
m_index_ring_buffer = std::make_unique<gl::ring_buffer>();
|
m_index_ring_buffer = gl_caps.vendor_AMD ? std::make_unique<gl::transient_ring_buffer>() : std::make_unique<gl::ring_buffer>();
|
||||||
m_vertex_instructions_buffer = std::make_unique<gl::ring_buffer>();
|
m_vertex_instructions_buffer = std::make_unique<gl::ring_buffer>();
|
||||||
m_fragment_instructions_buffer = std::make_unique<gl::ring_buffer>();
|
m_fragment_instructions_buffer = std::make_unique<gl::ring_buffer>();
|
||||||
m_raster_env_ring_buffer = std::make_unique<gl::ring_buffer>();
|
m_raster_env_ring_buffer = std::make_unique<gl::ring_buffer>();
|
||||||
}
|
}
|
||||||
|
|
||||||
m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000);
|
m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000);
|
||||||
m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000);
|
m_index_ring_buffer->create(gl::buffer::target::element_array, 16 * 0x100000);
|
||||||
m_transform_constants_buffer->create(gl::buffer::target::uniform, 64 * 0x100000);
|
m_transform_constants_buffer->create(gl::buffer::target::uniform, 64 * 0x100000);
|
||||||
m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||||
m_fragment_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
m_fragment_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
||||||
|
|
|
@ -812,6 +812,11 @@ namespace gl
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
virtual void bind()
|
||||||
|
{
|
||||||
|
buffer::bind();
|
||||||
|
}
|
||||||
|
|
||||||
virtual void recreate(GLsizeiptr size, const void* data = nullptr)
|
virtual void recreate(GLsizeiptr size, const void* data = nullptr)
|
||||||
{
|
{
|
||||||
if (m_id)
|
if (m_id)
|
||||||
|
@ -890,6 +895,8 @@ namespace gl
|
||||||
|
|
||||||
virtual void unmap() {}
|
virtual void unmap() {}
|
||||||
|
|
||||||
|
virtual void flush() {}
|
||||||
|
|
||||||
//Notification of a draw command
|
//Notification of a draw command
|
||||||
virtual void notify()
|
virtual void notify()
|
||||||
{
|
{
|
||||||
|
@ -1011,6 +1018,68 @@ namespace gl
|
||||||
void notify() override {}
|
void notify() override {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// A non-persistent ring buffer
|
||||||
|
// Internally maps and unmaps data. Uses persistent storage just like the regular persistent variant
|
||||||
|
// Works around drivers that have issues using mapped data for specific sources (e.g AMD proprietary driver with index buffers)
|
||||||
|
class transient_ring_buffer : public ring_buffer
|
||||||
|
{
|
||||||
|
bool dirty = false;
|
||||||
|
|
||||||
|
void* map_internal(u32 offset, u32 length)
|
||||||
|
{
|
||||||
|
flush();
|
||||||
|
|
||||||
|
dirty = true;
|
||||||
|
return DSA_CALL2_RET(MapNamedBufferRange, m_id, offset, length, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
void bind() override
|
||||||
|
{
|
||||||
|
flush();
|
||||||
|
buffer::bind();
|
||||||
|
}
|
||||||
|
|
||||||
|
void recreate(GLsizeiptr size, const void* data = nullptr) override
|
||||||
|
{
|
||||||
|
if (m_id)
|
||||||
|
{
|
||||||
|
m_fence.wait_for_signal();
|
||||||
|
remove();
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer::create();
|
||||||
|
save_binding_state save(current_target(), *this);
|
||||||
|
DSA_CALL2(NamedBufferStorage, m_id, size, data, GL_MAP_WRITE_BIT);
|
||||||
|
|
||||||
|
m_data_loc = 0;
|
||||||
|
m_size = ::narrow<u32>(size);
|
||||||
|
m_memory_type = memory_type::host_visible;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<void*, u32> alloc_from_heap(u32 alloc_size, u16 alignment) override
|
||||||
|
{
|
||||||
|
ensure(m_memory_mapping == nullptr);
|
||||||
|
const auto allocation = ring_buffer::alloc_from_heap(alloc_size, alignment);
|
||||||
|
return { map_internal(allocation.second, alloc_size), allocation.second };
|
||||||
|
}
|
||||||
|
|
||||||
|
void flush() override
|
||||||
|
{
|
||||||
|
if (dirty)
|
||||||
|
{
|
||||||
|
buffer::unmap();
|
||||||
|
dirty = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void unmap() override
|
||||||
|
{
|
||||||
|
flush();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class buffer_view
|
class buffer_view
|
||||||
{
|
{
|
||||||
buffer* m_buffer = nullptr;
|
buffer* m_buffer = nullptr;
|
||||||
|
|
Loading…
Reference in New Issue