mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl: improve speed of vertex streaming
Note yet enabled because I'm afraid of data corruption but feel free to test it The option: ogl_vertex_storage = 1 Performance note (warm cache+gs replay on colin3) 60 fps -> 76 fps
This commit is contained in:
parent
62489f42f1
commit
ce98276322
|
@ -415,6 +415,9 @@ void GSDeviceOGL::BeforeDraw()
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
ASSERT(gl_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
|
ASSERT(gl_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
|
||||||
#endif
|
#endif
|
||||||
|
// Ensure VBOs are uploaded
|
||||||
|
if (GLLoader::found_GL_ARB_buffer_storage)
|
||||||
|
Barrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
|
||||||
|
|
||||||
//#ifdef ENABLE_OGL_STENCIL_DEBUG
|
//#ifdef ENABLE_OGL_STENCIL_DEBUG
|
||||||
// if (m_date.t)
|
// if (m_date.t)
|
||||||
|
|
|
@ -42,7 +42,6 @@ class GSBufferOGL {
|
||||||
size_t m_limit;
|
size_t m_limit;
|
||||||
const GLenum m_target;
|
const GLenum m_target;
|
||||||
GLuint m_buffer_name;
|
GLuint m_buffer_name;
|
||||||
const bool m_sub_data_config;
|
|
||||||
uint8* m_buffer_ptr;
|
uint8* m_buffer_ptr;
|
||||||
const bool m_buffer_storage;
|
const bool m_buffer_storage;
|
||||||
|
|
||||||
|
@ -53,7 +52,6 @@ class GSBufferOGL {
|
||||||
, m_count(0)
|
, m_count(0)
|
||||||
, m_limit(0)
|
, m_limit(0)
|
||||||
, m_target(target)
|
, m_target(target)
|
||||||
, m_sub_data_config(theApp.GetConfig("ogl_vertex_subdata", 1) != 0)
|
|
||||||
, m_buffer_storage((theApp.GetConfig("ogl_vertex_storage", 0) == 1) && GLLoader::found_GL_ARB_buffer_storage)
|
, m_buffer_storage((theApp.GetConfig("ogl_vertex_storage", 0) == 1) && GLLoader::found_GL_ARB_buffer_storage)
|
||||||
{
|
{
|
||||||
gl_GenBuffers(1, &m_buffer_name);
|
gl_GenBuffers(1, &m_buffer_name);
|
||||||
|
@ -65,8 +63,14 @@ class GSBufferOGL {
|
||||||
#ifndef ENABLE_GLES
|
#ifndef ENABLE_GLES
|
||||||
bind();
|
bind();
|
||||||
// FIXME do I need the dynamic
|
// FIXME do I need the dynamic
|
||||||
const GLbitfield map_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
|
const GLbitfield map_flags = GL_MAP_WRITE_BIT
|
||||||
const GLbitfield create_flags = map_flags | GL_DYNAMIC_STORAGE_BIT;
|
| GL_MAP_PERSISTENT_BIT
|
||||||
|
// | GL_MAP_COHERENT_BIT (see barrier in GSDeviceOGL::BeforeDraw)
|
||||||
|
| GL_MAP_INVALIDATE_RANGE_BIT
|
||||||
|
;
|
||||||
|
const GLbitfield create_flags = map_flags
|
||||||
|
// | GL_CLIENT_STORAGE_BIT
|
||||||
|
;
|
||||||
|
|
||||||
gl_BufferStorage(m_target, m_stride*m_limit, NULL, create_flags );
|
gl_BufferStorage(m_target, m_stride*m_limit, NULL, create_flags );
|
||||||
m_buffer_ptr = (uint8*) gl_MapBufferRange(m_target, 0, m_stride*m_limit, map_flags);
|
m_buffer_ptr = (uint8*) gl_MapBufferRange(m_target, 0, m_stride*m_limit, map_flags);
|
||||||
|
@ -124,19 +128,35 @@ class GSBufferOGL {
|
||||||
void map_upload(const void* src, uint32 count)
|
void map_upload(const void* src, uint32 count)
|
||||||
{
|
{
|
||||||
void* dst;
|
void* dst;
|
||||||
if (Map(&dst, count)) {
|
|
||||||
#if 0
|
m_count = count;
|
||||||
// FIXME which one to use. Note dst doesn't have any aligment guarantee
|
|
||||||
// because it depends of the offset
|
// Get the pointer of the buffer
|
||||||
if (m_target == GL_ARRAY_BUFFER) {
|
{
|
||||||
GSVector4i::storent(dst, src, m_count * m_stride);
|
// It would need some protection of the data. For the moment finger cross!
|
||||||
} else {
|
if (m_count > m_limit) {
|
||||||
memcpy(dst, src, m_stride*m_count);
|
fprintf(stderr, "Buffer (%x) too small! Please report it upstream\n", m_target);
|
||||||
|
ASSERT(0);
|
||||||
|
} else if (m_count > (m_limit - m_start) ) {
|
||||||
|
//fprintf(stderr, "Wrap buffer (%x)\n", m_target);
|
||||||
|
// Wrap at startup
|
||||||
|
m_start = 0;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
memcpy(dst, src, m_stride*m_count);
|
dst = m_buffer_ptr + m_start*m_stride;
|
||||||
Unmap();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// FIXME which one to use. Note dst doesn't have any aligment guarantee
|
||||||
|
// because it depends of the offset
|
||||||
|
if (m_target == GL_ARRAY_BUFFER) {
|
||||||
|
GSVector4i::storent(dst, src, m_count * m_stride);
|
||||||
|
} else {
|
||||||
|
memcpy(dst, src, m_stride*m_count);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
memcpy(dst, src, m_stride*m_count);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ENABLE_GLES
|
#ifdef ENABLE_GLES
|
||||||
|
@ -157,61 +177,11 @@ class GSBufferOGL {
|
||||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||||
g_vertex_upload_byte += count*m_stride;
|
g_vertex_upload_byte += count*m_stride;
|
||||||
#endif
|
#endif
|
||||||
if (m_sub_data_config && !m_buffer_storage) {
|
|
||||||
subdata_upload(src, count);
|
|
||||||
} else {
|
|
||||||
map_upload(src, count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Map(void** pointer, uint32 count ) {
|
|
||||||
m_count = count;
|
|
||||||
|
|
||||||
if (m_buffer_storage) {
|
if (m_buffer_storage) {
|
||||||
// It would need some protection of the data. For the moment finger cross!
|
map_upload(src, count);
|
||||||
|
|
||||||
if (m_count > m_limit) {
|
|
||||||
fprintf(stderr, "Buffer (%x) too small! Please report it upstream\n", m_target);
|
|
||||||
ASSERT(0);
|
|
||||||
} else if (m_count > (m_limit - m_start) ) {
|
|
||||||
//fprintf(stderr, "Wrap buffer (%x)\n", m_target);
|
|
||||||
// Wrap at startup
|
|
||||||
m_start = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
*pointer = m_buffer_ptr + m_start*m_stride;
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Note: For an explanation of the map flag
|
subdata_upload(src, count);
|
||||||
// see http://www.opengl.org/wiki/Buffer_Object_Streaming
|
|
||||||
uint32 map_flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT;
|
|
||||||
|
|
||||||
// Current GPU buffer is really too small need to allocate a new one
|
|
||||||
if (m_count > m_limit) {
|
|
||||||
allocate(std::max<int>(m_count * 3 / 2, m_limit));
|
|
||||||
|
|
||||||
} else if (m_count > (m_limit - m_start) ) {
|
|
||||||
// Not enough left free room. Just go back at the beginning
|
|
||||||
m_start = 0;
|
|
||||||
|
|
||||||
// Tell the driver that it can orphan previous buffer and restart from a scratch buffer.
|
|
||||||
// Technically the buffer will not be accessible by the application anymore but the
|
|
||||||
// GL will effectively remove it when draws call are finised.
|
|
||||||
map_flags |= GL_MAP_INVALIDATE_BUFFER_BIT;
|
|
||||||
} else {
|
|
||||||
// Tell the driver that it doesn't need to contain any valid buffer data, and that you promise to write the entire range you map
|
|
||||||
map_flags |= GL_MAP_INVALIDATE_RANGE_BIT;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Upload the data to the buffer
|
|
||||||
*pointer = (uint8*) gl_MapBufferRange(m_target, m_stride*m_start, m_stride*m_count, map_flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Unmap() {
|
|
||||||
if (!m_buffer_storage) gl_UnmapBuffer(m_target);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EndScene()
|
void EndScene()
|
||||||
|
|
Loading…
Reference in New Issue