gsdx-ogl: LINUX-ONLY

* rewrite the vertex management with a nice GSVertexBufferState object * extend GSUniformBufferOGL to use a better object interface * properly delete texture * manage buffer with glMap* instead of glBufferSubData git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@4990 96395faa-99c1-11dd-bbfe-3dabce05a288
2011-12-15 18:27:58 +00:00 · 2011-12-15 18:27:58 +00:00 · ab01926ed5
parent 4029cc5195
commit ab01926ed5
3 changed files with 176 additions and 196 deletions
--- a/plugins/GSdx/GSDeviceOGL.cpp
+++ b/plugins/GSdx/GSDeviceOGL.cpp
@ -56,10 +56,9 @@
 GSDeviceOGL::GSDeviceOGL()
 	: m_free_window(false)
 	  , m_window(NULL)
-	  , m_vb(0)
 	  , m_pipeline(0)
 	  , m_fbo(0)
-	  , m_sr_vb_offset(0)
+	  , m_vb_sr(NULL)
 	  , m_srv_changed(false)
 	  , m_ss_changed(false)
 {
@ -74,6 +73,9 @@ GSDeviceOGL::GSDeviceOGL()

 GSDeviceOGL::~GSDeviceOGL()
 {
+	// Clean vertex buffer state
+	delete (m_vb_sr);
+
 	// Clean m_merge
 	for (uint i = 0; i < 2; i++)
 		glDeleteProgram(m_merge.ps[i]);
@ -86,8 +88,6 @@ GSDeviceOGL::~GSDeviceOGL()
 	delete (m_interlace.cb);

 	// Clean m_convert
-	glDeleteVertexArrays(1, &m_convert.va);
-	glDeleteBuffers(1, &m_convert.vb);
 	glDeleteProgram(m_convert.vs);
 	for (uint i = 0; i < 2; i++)
 		glDeleteProgram(m_convert.ps[i]);
@ -101,7 +101,6 @@ GSDeviceOGL::~GSDeviceOGL()
 	delete m_date.bs;

 	// Clean various opengl allocation
-	glDeleteBuffers(1, &m_vb);
 	glDeleteProgramPipelines(1, &m_pipeline);
 	glDeleteFramebuffers(1, &m_fbo);
 }
@ -180,31 +179,18 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
 	OMSetFBO(0);

 	// ****************************************************************
-	// convert
+	// Vertex buffer state
 	// ****************************************************************
-	
-	glGenVertexArrays(1, &m_convert.va);
-	IASetVertexArrray(m_convert.va);
-
-	glGenBuffers(1, &m_convert.vb);
-	IASetVertexBufferBind(m_convert.vb);
-	glBufferData(GL_ARRAY_BUFFER, 4 * 4 * sizeof(GSVertexPT1), NULL, GL_STREAM_DRAW);
-
 	GSInputLayout il_convert[2] =
 	{
 		{0, 4, GL_FLOAT, sizeof(GSVertexPT1), (const GLvoid*)offsetof(struct GSVertexPT1, p) },
 		{1, 2, GL_FLOAT, sizeof(GSVertexPT1), (const GLvoid*)offsetof(struct GSVertexPT1, t) },
 	};
+	m_vb_sr = new GSVertexBufferState(sizeof(GSVertexPT1), il_convert, countof(il_convert));

-	for (int i = 0; i < 2; i++) {
-		// Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer
-		glEnableVertexAttribArray(il_convert[i].index);
-		glVertexAttribPointer(il_convert[i].index, il_convert[i].size, il_convert[i].type, GL_FALSE,  il_convert[i].stride, il_convert[i].offset);
-	}
-	// Unbind to avoid issue with the setup of others parameters
-	IASetVertexArrray(0);
-	IASetVertexBufferBind(0);
-
+	// ****************************************************************
+	// convert
+	// ****************************************************************
 	CompileShaderFromSource("convert.glsl", "vs_main", GL_VERTEX_SHADER, &m_convert.vs);
 	for(int i = 0; i < countof(m_convert.ps); i++)
 		CompileShaderFromSource("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, &m_convert.ps[i]);
@ -264,10 +250,6 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
 	// merge
 	// ****************************************************************
 	m_merge.cb = new GSUniformBufferOGL(1, sizeof(MergeConstantBuffer));
-	glGenBuffers(1, &m_merge.cb->buffer);
-	glBindBuffer(GL_UNIFORM_BUFFER, m_merge.cb->buffer);
-	glBufferData(GL_UNIFORM_BUFFER, m_merge.cb->byte_size, NULL, GL_DYNAMIC_DRAW);
-	glBindBufferBase(GL_UNIFORM_BUFFER, m_merge.cb->index, m_merge.cb->buffer);

 	for(int i = 0; i < countof(m_merge.ps); i++)
 		CompileShaderFromSource("merge.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, &m_merge.ps[i]);
@ -285,10 +267,6 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
 	// interlace
 	// ****************************************************************
 	m_interlace.cb = new GSUniformBufferOGL(2, sizeof(InterlaceConstantBuffer));
-	glGenBuffers(1, &m_interlace.cb->buffer);
-	glBindBuffer(GL_UNIFORM_BUFFER, m_interlace.cb->buffer);
-	glBufferData(GL_UNIFORM_BUFFER, m_interlace.cb->byte_size, NULL, GL_DYNAMIC_DRAW);
-	glBindBufferBase(GL_UNIFORM_BUFFER, m_interlace.cb->index, m_interlace.cb->buffer);

 	for(int i = 0; i < countof(m_interlace.ps); i++)
 		CompileShaderFromSource("interlace.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, &m_interlace.ps[i]);
@ -473,27 +451,6 @@ bool GSDeviceOGL::Reset(int w, int h)
 	// in the backbuffer
 	m_backbuffer = new GSTextureOGL(0, w, h, false, 0);

-#if 0
-	if(m_swapchain)
-	{
-		DXGI_SWAP_CHAIN_DESC scd;
-
-		memset(&scd, 0, sizeof(scd));
-
-		m_swapchain->GetDesc(&scd);
-		m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0);
-
-		CComPtr<ID3D11Texture2D> backbuffer;
-
-		if(FAILED(m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer)))
-		{
-			return false;
-		}
-
-		m_backbuffer = new GSTexture11(backbuffer);
-	}
-#endif
-
 	return true;
 }

@ -506,7 +463,7 @@ void GSDeviceOGL::Flip()

 void GSDeviceOGL::DrawPrimitive()
 {
-	glDrawArrays(m_state.topology, m_vertices.start, m_vertices.count);
+	glDrawArrays(m_state.topology, m_state.vb_state->start, m_state.vb_state->count);
 }

 void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
@ -716,14 +673,9 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
 		{GSVector4(right, top, 0.5f, 1.0f), GSVector2(flip_sr.z, flip_sr.w)},
 	};

-	IASetVertexArrray(m_convert.va);
-	IASetVertexBufferBind(m_convert.vb);
-	// FIXME it will worth some benchmark.
-	// What is the faster always use the same. Or pack to difference emplacement. I'm afraid
-	// that in all case the GPU will be stall to wait the data
-	// Note maybe create a new buffer can be faster.
-	// m_sr_vb_offset = 0;
-	glBufferSubData(GL_ARRAY_BUFFER, m_sr_vb_offset * 4 * sizeof(GSVertexPT1) , sizeof(GSVertexPT1) * 4, vertices);
+	IASetVertexState(m_vb_sr);
+	IASetVertexBuffer(vertices, 4);
+	IASetPrimitiveTopology(GL_TRIANGLE_STRIP);

 	// ************************************
 	// vs
@ -748,8 +700,7 @@ void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
 	// ************************************
 	// Draw
 	// ************************************
-	glDrawArrays(GL_TRIANGLE_STRIP, m_sr_vb_offset * 4, 4);
-	m_sr_vb_offset = (m_sr_vb_offset + 1) & 0x3;
+	DrawPrimitive();

 	// ************************************
 	// End
@ -771,11 +722,11 @@ void GSDeviceOGL::DoMerge(GSTexture* st[2], GSVector4* sr, GSTexture* dt, GSVect

 	if(st[0])
 	{
-		if (m_state.cb != m_merge.cb->buffer) {
-			m_state.cb = m_merge.cb->buffer;
-			glBindBuffer(GL_UNIFORM_BUFFER, m_merge.cb->buffer);
+		if (m_state.cb != m_merge.cb) {
+			m_state.cb = m_merge.cb;
+			m_state.cb->bind();
 		}
-		glBufferSubData(GL_UNIFORM_BUFFER, 0, m_merge.cb->byte_size, &c.v);
+		m_state.cb->upload(&c.v);

 		StretchRect(st[0], sr[0], dt, dr[0], m_merge.ps[mmod ? 1 : 0], m_merge.bs);
 	}
@ -793,11 +744,11 @@ void GSDeviceOGL::DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool lin
 	cb.ZrH = GSVector2(0, 1.0f / s.y);
 	cb.hH = s.y / 2;

-	if (m_state.cb != m_interlace.cb->buffer) {
-		m_state.cb = m_interlace.cb->buffer;
-		glBindBuffer(GL_UNIFORM_BUFFER, m_interlace.cb->buffer);
+	if (m_state.cb != m_interlace.cb) {
+		 m_state.cb = m_interlace.cb;
+		 m_state.cb->bind();
 	}
-	glBufferSubData(GL_UNIFORM_BUFFER, 0, m_interlace.cb->byte_size, &cb);
+	m_state.cb->upload(&cb);

 	StretchRect(st, sr, dt, dr, m_interlace.ps[shader], linear);
 }
@ -823,88 +774,52 @@ GSTexture* GSDeviceOGL::Resolve(GSTexture* t)
 	return NULL;
 }

-void GSDeviceOGL::IASetVertexArrray(GLuint va)
+void GSDeviceOGL::EndScene()
 {
-	if (m_state.va != va) {
-		glBindVertexArray(va);
-		m_state.va = va;
+	m_state.vb_state->start += m_state.vb_state->count;
+	m_state.vb_state->count = 0;
+}
+
+void GSDeviceOGL::IASetVertexState(GSVertexBufferState* vb_state)
+{
+	if (m_state.vb_state != vb_state) {
+		m_state.vb_state = vb_state;
+		vb_state->bind();
 	}
 }

-void GSDeviceOGL::IASetVertexBufferBind(GLuint vb)
+void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count)
 {
-	if (m_state.vb != vb) {
-		glBindBuffer(GL_ARRAY_BUFFER, vb);
-		m_state.vb = vb;
+	// Note: For an explanation of the map flag
+	// see http://www.opengl.org/wiki/Buffer_Object_Streaming
+	uint32 map_flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT;
+
+	GSVertexBufferState* vb = m_state.vb_state;
+	vb->count = count;
+
+	// Current GPU buffer is really too small need to realocate a new one
+	if (count > vb->limit) {
+		vb->allocate(std::max<int>(count * 3 / 2, 60000));
+
+	} else if (count > (vb->limit - vb->start) ) {
+		// Not enough left free room. Just go back at the beginning
+		vb->start = 0;
+
+		// Tell the driver that it can orphan previous buffer and restart from a scratch buffer.
+		// Technically the buffer will not be accessible by the application anymore but the
+		// GL will effectively remove it when draws call are finised.
+		map_flags |= GL_MAP_INVALIDATE_BUFFER_BIT;
+	} else {
+		// Tell the driver that it doesn't need to contain any valid buffer data, and that you promise to write the entire range you map
+		map_flags |= GL_MAP_INVALIDATE_RANGE_BIT;
 	}
+
+	vb->upload(vertices, map_flags);
 }

-void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t stride, size_t count)
-{
-	ASSERT(m_vertices.count == 0);
-
-	if(count * stride > m_vertices.limit * m_vertices.stride)
-	{
-		// Current GPU buffer is too small need to realocate a new one
-		if (m_vb) {
-			glDeleteBuffers(1, &m_vb);
-			m_vb = 0;
-		}
-
-		m_vertices.start = 0;
-		m_vertices.count = 0;
-		m_vertices.limit = std::max<int>(count * 3 / 2, 11000);
-		m_vertices.stride = stride;
-	}
-
-	if(!m_vb)
-	{
-		glGenBuffers(1, &m_vb);
-		IASetVertexBufferBind(m_vb);
-		// Allocate the buffer
-		glBufferData(GL_ARRAY_BUFFER, m_vertices.limit * m_vertices.stride, NULL, GL_STREAM_DRAW);
-		//m_vb_changed = true;
-	}
-
-	// append data or go back to the beginning
-	// Hum why we don't always go back to the beginning !!!
-	if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride)
-		m_vertices.start = 0;
-
-	// Fill the buffer
-	glBufferSubData(GL_ARRAY_BUFFER, m_vertices.start * stride, count * stride, vertices);
-
-	m_vertices.count = count;
-}
-
-#if 0
-void GSDeviceOGL::IASetInputLayout(GSInputLayout* layout, int layout_nbr)
-{
-	if(m_state.layout != layout || m_state.layout_nbr != layout_nbr || m_vb_changed)
-	{
-		// Remove old configuration.
-		for (int i = m_state.layout_nbr ; i > (m_state.layout_nbr - layout_nbr) ; i--) {
-			glDisableVertexAttribArray(i);
-		}
-
-		for (int i = 0; i < layout_nbr; i++) {
-			glEnableVertexAttribArray(layout[i].index);
-			glVertexAttribPointer(layout[i].index, layout[i].size, layout[i].type, GL_FALSE,  layout[i].stride, layout[i].offset);
-		}
-
-		m_vb_changed = false;
-		m_state.layout = layout;
-		m_state.layout_nbr = layout_nbr;
-	}
-}
-#endif
-
 void GSDeviceOGL::IASetPrimitiveTopology(GLenum topology)
 {
-	if(m_state.topology != topology)
-	{
-		m_state.topology = topology;
-	}
+	m_state.topology = topology;
 }

 void GSDeviceOGL::VSSetShader(GLuint vs)
--- a/plugins/GSdx/GSDeviceOGL.h
+++ b/plugins/GSdx/GSDeviceOGL.h
@ -74,15 +74,43 @@ struct GSDepthStencilOGL {
 	{}
 };

-struct GSUniformBufferOGL {
+class GSUniformBufferOGL {
 	GLuint buffer;		// data object
 	GLuint index;		// GLSL slot
-	uint   byte_size;	// size of the data
+	uint   size;	    // size of the data

-	GSUniformBufferOGL(GLuint index, uint byte_size) : buffer(0)
-														, index(index)
-														, byte_size(byte_size)
-	{}
+public:
+	GSUniformBufferOGL(GLuint index, uint size) : index(index)
+														, size(size)
+	{
+		glGenBuffers(1, &buffer);
+		bind();
+		allocate();
+		attach();
+	}
+
+	void bind()
+	{
+		glBindBuffer(GL_UNIFORM_BUFFER, buffer);
+	}
+
+	void allocate()
+	{
+		glBufferData(GL_UNIFORM_BUFFER, size, NULL, GL_STREAM_DRAW);
+	}
+
+	void attach()
+	{
+		glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer);
+	}
+
+	void upload(const void* src)
+	{
+		uint32 flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT;
+		uint8* dst = (uint8*) glMapBufferRange(GL_UNIFORM_BUFFER, 0, size, flags);
+		memcpy(dst, src, size);
+		glUnmapBuffer(GL_UNIFORM_BUFFER);
+	}

 	~GSUniformBufferOGL() {
 		glDeleteBuffers(1, &buffer);
@ -97,6 +125,60 @@ struct GSInputLayout {
 	const GLvoid* offset;
 };

+struct GSVertexBufferState {
+	size_t stride;
+	size_t start;
+	size_t count;
+	size_t limit;
+	GLuint vb;
+	GLuint va;
+
+	GSVertexBufferState(size_t stride, GSInputLayout* layout, uint32 layout_nbr) : stride(stride)
+								  , count(0)
+	{
+		glGenBuffers(1, &vb);
+		glGenVertexArrays(1, &va);
+		bind();
+		allocate(60000); // Opengl works best with 1-4MB buffer. 60k element seems a good value.
+		set_internal_format(layout, layout_nbr);
+	}
+
+	void allocate(size_t new_limit)
+	{
+		start = 0;
+		limit = new_limit;
+		glBufferData(GL_ARRAY_BUFFER,  limit * stride, NULL, GL_STREAM_DRAW);
+	}
+
+	void bind()
+	{
+		glBindVertexArray(va);
+		glBindBuffer(GL_ARRAY_BUFFER, vb);
+	}
+
+	void upload(const void* src, uint32 flags)
+	{
+		uint8* dst = (uint8*) glMapBufferRange(GL_ARRAY_BUFFER, stride*start, stride*count, flags);
+		memcpy(dst, src, stride*count);
+		glUnmapBuffer(GL_ARRAY_BUFFER);
+	}
+
+	void set_internal_format(GSInputLayout* layout, uint32 layout_nbr)
+	{
+		for (int i = 0; i < layout_nbr; i++) {
+			// Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer
+			glEnableVertexAttribArray(layout[i].index);
+			glVertexAttribPointer(layout[i].index, layout[i].size, layout[i].type, GL_FALSE,  layout[i].stride, layout[i].offset);
+		}
+	}
+
+	~GSVertexBufferState()
+	{
+		glDeleteBuffers(1, &vb);
+		glDeleteVertexArrays(1, &va);
+	}
+};
+
 class GSDeviceOGL : public GSDevice
 {
 	uint32 m_msaa;				// Level of Msaa
@ -104,10 +186,10 @@ class GSDeviceOGL : public GSDevice
 	bool m_free_window;			
 	GSWnd* m_window;

-	GLuint m_vb;				// vertex buffer object
 	GLuint m_pipeline;			// pipeline to attach program shader
 	GLuint m_fbo;				// frame buffer container
-	uint32 m_sr_vb_offset;
+
+	GSVertexBufferState* m_vb_sr; // vb_state for StretchRect

 	struct {
 		GLuint ps[2];				 // program object
@ -121,12 +203,6 @@ class GSDeviceOGL : public GSDevice
 	} m_interlace;

 	struct {
-		// Hum I think this one is useless. As far as I understand
-		// it only get the index name of GLSL-equivalent input attribut 
-		// ??? CComPtr<ID3D11InputLayout> il;
-		//GSInputLayout il[2]; // description of the vertex array
-		GLuint va;		// vertex array object
-		GLuint vb;		// vertex buffer
 		GLuint vs;		// program object
 		GLuint ps[8];	// program object
 		GLuint ln;		// sampler object
@ -141,40 +217,12 @@ class GSDeviceOGL : public GSDevice
 		GSBlendStateOGL* bs;
 	} m_date;

-	// struct
-	// {
-	// 	ID3D11Buffer* vb;
-	// 	size_t vb_stride;
-	// 	ID3D11InputLayout* layout;
-	// 	D3D11_PRIMITIVE_TOPOLOGY topology;
-	// 	ID3D11VertexShader* vs;
-	// 	ID3D11Buffer* vs_cb;
-	// 	ID3D11GeometryShader* gs;
-	// 	ID3D11ShaderResourceView* ps_srv[3];
-	// 	ID3D11PixelShader* ps;
-	// 	ID3D11Buffer* ps_cb;
-	// 	ID3D11SamplerState* ps_ss[3];
-	// 	GSVector2i viewport;
-	// 	GSVector4i scissor;
-	// 	ID3D11DepthStencilState* dss;
-	// 	uint8 sref;
-	// 	ID3D11BlendState* bs;
-	// 	float bf;
-	// 	ID3D11RenderTargetView* rtv;
-	// 	ID3D11DepthStencilView* dsv;
-	// } m_state;
 	struct
 	{
-		GLuint vb;  // vertex buffer
-		// Hum I think those things can be dropped on OGL. It probably need an others architecture (see glVertexAttribPointer)
-		// size_t vb_stride;
-		// ID3D11InputLayout* layout;
-		//GSInputLayout* layout;
-		//uint32 layout_nbr;
-		GLuint va;  // vertex array
+		GSVertexBufferState* vb_state;
 		GLenum topology; // (ie GL_TRIANGLES...)
 		GLuint vs; // program
-		GLuint cb; // uniform current buffer
+		GSUniformBufferOGL* cb; // uniform current buffer
 		GLuint gs; // program
 		// FIXME texture binding. Maybe not equivalent for the state but the best I could find.
 		GSTextureOGL* ps_srv[3];
@ -197,7 +245,6 @@ class GSDeviceOGL : public GSDevice

 	bool m_srv_changed;
 	bool m_ss_changed;
-	//bool m_vb_changed;

 #if 0
 	CComPtr<ID3D11Device> m_dev;
@ -271,11 +318,11 @@ class GSDeviceOGL : public GSDevice

 		void CompileShaderFromSource(const std::string& glsl_file, const std::string& entry, GLenum type, GLuint* program);

+		void EndScene();
+
 		void IASetPrimitiveTopology(GLenum topology);
-		//void IASetInputLayout(GSInputLayout* layout, int layout_nbr);
-		void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);
-		void IASetVertexBufferBind(GLuint vb);
-		void IASetVertexArrray(GLuint va);
+		void IASetVertexBuffer(const void* vertices, size_t count);
+		void IASetVertexState(GSVertexBufferState* vb_state);

 		void VSSetShader(GLuint vs);
 		void GSSetShader(GLuint gs);
--- a/plugins/GSdx/GSTextureOGL.cpp
+++ b/plugins/GSdx/GSTextureOGL.cpp
@ -157,7 +157,21 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, bool msaa, int format)

 GSTextureOGL::~GSTextureOGL()
 {
-	// glDeleteTextures or glDeleteRenderbuffers
+	glDeleteBuffers(1, &m_extra_buffer_id);
+	switch (m_type) {
+		case GSTexture::Texture:
+		case GSTexture::RenderTarget:
+			glDeleteTextures(1, &m_texture_id);
+			break;
+		case GSTexture::DepthStencil:
+			glDeleteRenderbuffers(1, &m_texture_id);
+			break;
+		case GSTexture::Offscreen:
+			assert(0);
+			break;
+		default:
+			break;
+	}
 }

 void GSTextureOGL::Attach(GLenum attachment)
@ -192,15 +206,19 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
 	// The case appears on SW mode. Src pitch is 2x dst pitch.
 	int rowbytes = r.width() << 2;
 	if (pitch != rowbytes) {
-		glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_extra_buffer_id);
+		uint32 pbo_size = m_size.x * m_size.y * 4;
+		uint32 map_flags = GL_MAP_WRITE_BIT;

+		glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_extra_buffer_id);
 		if (!m_extra_buffer_allocated) {
-			glBufferData(GL_PIXEL_UNPACK_BUFFER, m_size.x * m_size.y * 4, NULL, GL_STREAM_DRAW);
+			glBufferData(GL_PIXEL_UNPACK_BUFFER, pbo_size, NULL, GL_STREAM_DRAW);
 			m_extra_buffer_allocated = true;
+		} else {
+			GL_MAP_INVALIDATE_BUFFER_BIT;
 		}

 		uint8* src = (uint8*) data;
-		uint8* dst = (uint8*) glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY);
+		uint8* dst = (uint8*) glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, pbo_size, map_flags);
 		for(int h = r.height(); h > 0; h--, src += pitch, dst += rowbytes)
 		{
 			memcpy(dst, src, rowbytes);