From 26c22f003f1c818ff0104b3a77e6a7367be8cbc0 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Thu, 7 Nov 2019 00:08:13 +1000 Subject: [PATCH] Common: Persistent-mapped stream buffer implementation --- src/common/gl/stream_buffer.cpp | 286 +++++++++++++++++++++++++++++--- src/common/gl/stream_buffer.h | 11 +- src/core/gpu_hw_opengl.cpp | 6 +- 3 files changed, 270 insertions(+), 33 deletions(-) diff --git a/src/common/gl/stream_buffer.cpp b/src/common/gl/stream_buffer.cpp index 93792f0e8..b8b17827b 100644 --- a/src/common/gl/stream_buffer.cpp +++ b/src/common/gl/stream_buffer.cpp @@ -1,9 +1,12 @@ #include "stream_buffer.h" +#include "YBaseLib/Assert.h" +#include +#include namespace GL { StreamBuffer::StreamBuffer(GLenum target, GLuint buffer_id, u32 size) - : m_target(target), m_buffer_id(buffer_id), m_size(size), m_cpu_buffer(size) + : m_target(target), m_buffer_id(buffer_id), m_size(size) { } @@ -22,37 +25,274 @@ void StreamBuffer::Unbind() glBindBuffer(m_target, 0); } -StreamBuffer::MappingResult StreamBuffer::Map(u32 alignment, u32 min_size) -{ - return MappingResult{static_cast(m_cpu_buffer.data()), 0, 0, m_size / alignment}; -} +namespace detail { -void StreamBuffer::Unmap(u32 used_size) +// Uses glBufferSubData() to update. Preferred for drivers which don't support {ARB,EXT}_buffer_storage. +class BufferSubDataStreamBuffer final : public StreamBuffer { - if (used_size == 0) - return; +public: + ~BufferSubDataStreamBuffer() override = default; - glBindBuffer(m_target, m_buffer_id); - glBufferSubData(m_target, 0, used_size, m_cpu_buffer.data()); -} + MappingResult Map(u32 alignment, u32 min_size) override + { + return MappingResult{static_cast(m_cpu_buffer.data()), 0, 0, m_size / alignment}; + } + + void Unmap(u32 used_size) override + { + if (used_size == 0) + return; + + glBindBuffer(m_target, m_buffer_id); + glBufferSubData(m_target, 0, used_size, m_cpu_buffer.data()); + } + + static std::unique_ptr Create(GLenum target, u32 size) + { + glGetError(); + + GLuint buffer_id; + glGenBuffers(1, &buffer_id); + glBindBuffer(target, buffer_id); + glBufferData(target, size, nullptr, GL_STREAM_DRAW); + + GLenum err = glGetError(); + if (err != GL_NO_ERROR) + { + glDeleteBuffers(1, &buffer_id); + return {}; + } + + return std::unique_ptr(new BufferSubDataStreamBuffer(target, buffer_id, size)); + } + +private: + BufferSubDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) + : StreamBuffer(target, buffer_id, size), m_cpu_buffer(size) + { + } + + std::vector m_cpu_buffer; +}; + +// Uses BufferData() to orphan the buffer after every update. Used on Mali where BufferSubData forces a sync. +class BufferDataStreamBuffer final : public StreamBuffer +{ +public: + ~BufferDataStreamBuffer() override = default; + + MappingResult Map(u32 alignment, u32 min_size) override + { + return MappingResult{static_cast(m_cpu_buffer.data()), 0, 0, m_size / alignment}; + } + + void Unmap(u32 used_size) override + { + if (used_size == 0) + return; + + glBindBuffer(m_target, m_buffer_id); + glBufferData(m_target, used_size, m_cpu_buffer.data(), GL_STREAM_DRAW); + } + + static std::unique_ptr Create(GLenum target, u32 size) + { + glGetError(); + + GLuint buffer_id; + glGenBuffers(1, &buffer_id); + glBindBuffer(target, buffer_id); + glBufferData(target, size, nullptr, GL_STREAM_DRAW); + + GLenum err = glGetError(); + if (err != GL_NO_ERROR) + { + glDeleteBuffers(1, &buffer_id); + return {}; + } + + return std::unique_ptr(new BufferDataStreamBuffer(target, buffer_id, size)); + } + +private: + BufferDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) + : StreamBuffer(target, buffer_id, size), m_cpu_buffer(size) + { + } + + std::vector m_cpu_buffer; +}; + +// Base class for implementations which require syncing. +class SyncingStreamBuffer : public StreamBuffer +{ +public: + enum : u32 + { + NUM_SYNC_POINTS = 16 + }; + + virtual ~SyncingStreamBuffer() override + { + for (u32 i = m_available_block_index; i <= m_used_block_index; i++) + { + DebugAssert(m_sync_objects[i]); + glDeleteSync(m_sync_objects[i]); + } + } + +protected: + SyncingStreamBuffer(GLenum target, GLuint buffer_id, u32 size) + : StreamBuffer(target, buffer_id, size), m_bytes_per_block((size + (NUM_SYNC_POINTS)-1) / NUM_SYNC_POINTS) + { + } + + u32 GetSyncIndexForOffset(u32 offset) { return offset / m_bytes_per_block; } + + void AddSyncsForOffset(u32 offset) + { + const u32 end = GetSyncIndexForOffset(offset); + for (; m_used_block_index < end; m_used_block_index++) + { + DebugAssert(!m_sync_objects[m_used_block_index]); + m_sync_objects[m_used_block_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + } + + void WaitForSync(GLsync& sync) + { + glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + glDeleteSync(sync); + sync = nullptr; + } + + void EnsureSyncsWaitedForOffset(u32 offset) + { + const u32 end = std::min(GetSyncIndexForOffset(offset) + 1, NUM_SYNC_POINTS); + for (; m_available_block_index < end; m_available_block_index++) + { + DebugAssert(m_sync_objects[m_available_block_index]); + WaitForSync(m_sync_objects[m_available_block_index]); + } + } + + void AllocateSpace(u32 size) + { + // add sync objects for writes since the last allocation + AddSyncsForOffset(m_position); + + // wait for sync objects for the space we want to use + EnsureSyncsWaitedForOffset(m_position + size); + + // wrap-around? + if ((m_position + size) > m_size) + { + // current position ... buffer end + AddSyncsForOffset(m_size); + + // rewind, and try again + m_position = 0; + + // wait for the sync at the start of the buffer + WaitForSync(m_sync_objects[0]); + m_available_block_index = 1; + + // and however much more we need to satisfy the allocation + EnsureSyncsWaitedForOffset(size); + m_used_block_index = 0; + } + } + + u32 m_position = 0; + u32 m_used_block_index = 0; + u32 m_available_block_index = NUM_SYNC_POINTS; + u32 m_bytes_per_block; + std::array m_sync_objects{}; +}; + +class BufferStorageStreamBuffer : public SyncingStreamBuffer +{ +public: + ~BufferStorageStreamBuffer() override + { + glBindBuffer(m_target, m_buffer_id); + glUnmapBuffer(m_target); + } + + MappingResult Map(u32 alignment, u32 min_size) override + { + if (m_position > 0) + m_position = Common::AlignUp(m_position, alignment); + + AllocateSpace(min_size); + DebugAssert((m_position + min_size) <= (m_available_block_index * m_bytes_per_block)); + + const u32 free_space_in_block = ((m_available_block_index * m_bytes_per_block) - m_position); + return MappingResult{static_cast(m_mapped_ptr + m_position), m_position, m_position / alignment, + free_space_in_block / alignment}; + } + + void Unmap(u32 used_size) override + { + DebugAssert((m_position + used_size) <= m_size); + m_position += used_size; + } + + static std::unique_ptr Create(GLenum target, u32 size) + { + glGetError(); + + GLuint buffer_id; + glGenBuffers(1, &buffer_id); + glBindBuffer(target, buffer_id); + if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage) + glBufferStorage(target, size, nullptr, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + else if (GLAD_GL_EXT_buffer_storage) + glBufferStorageEXT(target, size, nullptr, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + + GLenum err = glGetError(); + if (err != GL_NO_ERROR) + { + glDeleteBuffers(1, &buffer_id); + return {}; + } + + u8* mapped_ptr = static_cast( + glMapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT)); + Assert(mapped_ptr); + + return std::unique_ptr(new BufferStorageStreamBuffer(target, buffer_id, size, mapped_ptr)); + } + +private: + BufferStorageStreamBuffer(GLenum target, GLuint buffer_id, u32 size, u8* mapped_ptr) + : SyncingStreamBuffer(target, buffer_id, size), m_mapped_ptr(mapped_ptr) + { + } + + u8* m_mapped_ptr; +}; + +} // namespace detail std::unique_ptr StreamBuffer::Create(GLenum target, u32 size) { - glGetError(); - - GLuint buffer_id; - glGenBuffers(1, &buffer_id); - glBindBuffer(target, buffer_id); - glBufferData(target, size, nullptr, GL_STREAM_DRAW); - - GLenum err = glGetError(); - if (err != GL_NO_ERROR) + std::unique_ptr buf; + if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage) { - glDeleteBuffers(1, &buffer_id); - return {}; + buf = detail::BufferStorageStreamBuffer::Create(target, size); + if (buf) + return buf; } - return std::unique_ptr(new StreamBuffer(target, buffer_id, size)); + const char* vendor = reinterpret_cast(glGetString(GL_VENDOR)); + if (std::strcmp(vendor, "ARM") == 0 || std::strcmp(vendor, "Qualcomm") == 0) + { + // Mali and Adreno drivers can't do sub-buffer tracking... + return detail::BufferDataStreamBuffer::Create(target, size); + } + + return detail::BufferSubDataStreamBuffer::Create(target, size); } } // namespace GL \ No newline at end of file diff --git a/src/common/gl/stream_buffer.h b/src/common/gl/stream_buffer.h index 6869ec9d4..78958dcf2 100644 --- a/src/common/gl/stream_buffer.h +++ b/src/common/gl/stream_buffer.h @@ -6,11 +6,10 @@ #include namespace GL { -// TODO: Persistent mapping-based implementation class StreamBuffer { public: - ~StreamBuffer(); + virtual ~StreamBuffer(); ALWAYS_INLINE GLuint GetGLBufferId() const { return m_buffer_id; } ALWAYS_INLINE GLenum GetGLTarget() const { return m_target; } @@ -27,18 +26,16 @@ public: u32 space_aligned; // remaining space / alignment }; - MappingResult Map(u32 alignment, u32 min_size); - void Unmap(u32 used_size); + virtual MappingResult Map(u32 alignment, u32 min_size) = 0; + virtual void Unmap(u32 used_size) = 0; static std::unique_ptr Create(GLenum target, u32 size); -private: +protected: StreamBuffer(GLenum target, GLuint buffer_id, u32 size); GLenum m_target; GLuint m_buffer_id; u32 m_size; - - std::vector m_cpu_buffer; }; } // namespace GL \ No newline at end of file diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 567e1d896..eb3d0da5c 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -756,14 +756,14 @@ void GPU_HW_OpenGL::FlushRender() if (m_batch.NeedsTwoPassRendering()) { SetDrawState(BatchRenderMode::OnlyTransparent); - glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, vertex_count); + glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], m_batch_base_vertex, vertex_count); SetDrawState(BatchRenderMode::OnlyOpaque); - glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, vertex_count); + glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], m_batch_base_vertex, vertex_count); } else { SetDrawState(m_batch.GetRenderMode()); - glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, vertex_count); + glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], m_batch_base_vertex, vertex_count); } }