gsdx ogl: add buffer_storage for vertex upload

A bit slower. Maybe because SubData does the copy in the driver thread. My memcpy is done on the main thread. I'm not sure it would worth an extra thread to copy vertex data to the GPU Note: testers are welcome. You need to edit the ini file. "ogl_vertex_storage=1" <= enable the extension "ogl_vertex_storage=0" <= disable the extension Again you need the support of GL_arb_buffer_storage (i.e. not catalyst)
2014-03-24 10:49:45 +01:00 · 2014-03-24 10:49:45 +01:00 · c85ba4c259
parent 403518e852
commit c85ba4c259
1 changed files with 82 additions and 36 deletions
--- a/plugins/GSdx/GSVertexArrayOGL.h
+++ b/plugins/GSdx/GSVertexArrayOGL.h
@ -36,8 +36,10 @@ class GSBufferOGL {
 	size_t m_count;
 	size_t m_limit;
 	const  GLenum m_target;
-	GLuint m_buffer;
+	GLuint m_buffer_name;
 	const bool m_sub_data_config;
+	uint8*  m_buffer_ptr;
+	const bool m_buffer_storage;

 	public:
 	GSBufferOGL(GLenum target, size_t stride) :
@ -47,28 +49,48 @@ class GSBufferOGL {
 		, m_limit(0)
 		, m_target(target)
 		, m_sub_data_config(theApp.GetConfig("ogl_vertex_subdata", 1) != 0)
+		, m_buffer_storage((theApp.GetConfig("ogl_vertex_storage", 0) == 1) && GLLoader::found_GL_ARB_buffer_storage)
 	{
-		gl_GenBuffers(1, &m_buffer);
+		gl_GenBuffers(1, &m_buffer_name);
 		// Opengl works best with 1-4MB buffer.
 		// Warning m_limit is the number of object (not the size in Bytes)
-		m_limit = 2 * 1024 * 1024 / m_stride;
-		//m_limit = 512 * 1024 * m_stride;
+		m_limit = 2 * 2 * 1024 * 1024 / m_stride;
+
+		if (m_buffer_storage) {
+			bind();
+			// FIXME do I need the dynamic
+			const GLbitfield map_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
+			const GLbitfield create_flags = map_flags | GL_DYNAMIC_STORAGE_BIT;
+
+			gl_BufferStorage(m_target, m_stride*m_limit, NULL, create_flags );
+			m_buffer_ptr = (uint8*) gl_MapBufferRange(m_target, 0, m_stride*m_limit, map_flags);
+		} else {
+			m_buffer_ptr = NULL;
+		}
 	}

-	~GSBufferOGL() { gl_DeleteBuffers(1, &m_buffer); }
+	~GSBufferOGL() {
+		if (m_buffer_storage) {
+			bind();
+			gl_UnmapBuffer(m_target);
+		}
+		gl_DeleteBuffers(1, &m_buffer_name);
+	}

 	void allocate() { allocate(m_limit); }

 	void allocate(size_t new_limit)
 	{
+		if (!m_buffer_storage) {
 			m_start = 0;
 			m_limit = new_limit;
 			gl_BufferData(m_target,  m_limit * m_stride, NULL, GL_STREAM_DRAW);
 		}
+	}

 	void bind()
 	{
-		gl_BindBuffer(m_target, m_buffer);
+		gl_BindBuffer(m_target, m_buffer_name);
 	}

 	void subdata_upload(const void* src, uint32 count)
@ -96,9 +118,15 @@ class GSBufferOGL {
 	{
 		void* dst;
 		if (Map(&dst, count)) {
+#if 0
 			// FIXME which one to use. Note dst doesn't have any aligment guarantee
 			// because it depends of the offset
-			//GSVector4i::storent(dst, src, m_count * m_stride);
+			if (m_target == GL_ARRAY_BUFFER) {
+				GSVector4i::storent(dst, src, m_count * m_stride);
+			} else {
+				memcpy(dst, src, m_stride*m_count);
+			}
+#endif
 			memcpy(dst, src, m_stride*m_count);
 			Unmap();
 		}
@ -119,7 +147,7 @@ class GSBufferOGL {
 			}
 		}
 #endif
-		if (m_sub_data_config) {
+		if (m_sub_data_config && !m_buffer_storage) {
 			subdata_upload(src, count);
 		} else {
 			map_upload(src, count);
@ -129,6 +157,21 @@ class GSBufferOGL {
 	bool Map(void** pointer, uint32 count ) {
 		m_count = count;

+		if (m_buffer_storage) {
+			// It would need some protection of the data. For the moment finger cross!
+
+			if (m_count > m_limit) {
+				fprintf(stderr, "Buffer (%x) too small! Please report it upstream\n", m_target);
+				ASSERT(0);
+			} else if (m_count > (m_limit - m_start) ) {
+				//fprintf(stderr, "Wrap buffer (%x)\n", m_target);
+				// Wrap at startup
+				m_start = 0;
+			}
+
+			*pointer = m_buffer_ptr + m_start*m_stride;
+
+		} else {
 			// Note: For an explanation of the map flag
 			// see http://www.opengl.org/wiki/Buffer_Object_Streaming
 			uint32 map_flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT;
@ -152,11 +195,14 @@ class GSBufferOGL {

 			// Upload the data to the buffer
 			*pointer = (uint8*) gl_MapBufferRange(m_target, m_stride*m_start, m_stride*m_count, map_flags);
+		}

 		return true;
 	}

-	void Unmap() { gl_UnmapBuffer(m_target); }
+	void Unmap() {
+		if (!m_buffer_storage) gl_UnmapBuffer(m_target);
+	}

 	void EndScene()
 	{
@ -212,7 +258,7 @@ public:
 		m_ib = new GSBufferOGL(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint32));

 		bind();
-		// Note: index array are part of the VA state so it need to be bind only once.
+		// Note: index array are part of the VA state so it need to be bound only once.
 		m_ib->bind();

 		m_vb->allocate();