mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl: add some fences to protect the upload of vbo buffer
This way ogl_vertex_storage must be safer to activate And it brings a nice performance boost (game with lots of primitives and reasonable upscaling) SotC testcase 4x: 61fps => 78fps
This commit is contained in:
parent
36514bd95f
commit
7b0775d887
|
@ -44,6 +44,7 @@ class GSBufferOGL {
|
|||
GLuint m_buffer_name;
|
||||
uint8* m_buffer_ptr;
|
||||
const bool m_buffer_storage;
|
||||
GLsync m_fence[5];
|
||||
|
||||
public:
|
||||
GSBufferOGL(GLenum target, size_t stride) :
|
||||
|
@ -60,14 +61,18 @@ class GSBufferOGL {
|
|||
m_limit = 2 * 2 * 1024 * 1024 / m_stride;
|
||||
|
||||
if (m_buffer_storage) {
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
m_fence[i] = 0;
|
||||
}
|
||||
|
||||
// TODO: if we do manually the synchronization, I'm not sure size is important. It worths to investigate it.
|
||||
// => bigger buffer => less sync
|
||||
#ifndef ENABLE_GLES
|
||||
bind();
|
||||
// coherency will be done by flushing
|
||||
const GLbitfield common_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
|
||||
const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT;
|
||||
const GLbitfield create_flags = common_flags
|
||||
// | GL_CLIENT_STORAGE_BIT
|
||||
;
|
||||
const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT;
|
||||
|
||||
gl_BufferStorage(m_target, m_stride*m_limit, NULL, create_flags );
|
||||
m_buffer_ptr = (uint8*) gl_MapBufferRange(m_target, 0, m_stride*m_limit, map_flags);
|
||||
|
@ -79,6 +84,9 @@ class GSBufferOGL {
|
|||
|
||||
~GSBufferOGL() {
|
||||
if (m_buffer_storage) {
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
gl_DeleteSync(m_fence[i]);
|
||||
}
|
||||
bind();
|
||||
gl_UnmapBuffer(m_target);
|
||||
}
|
||||
|
@ -101,10 +109,8 @@ class GSBufferOGL {
|
|||
gl_BindBuffer(m_target, m_buffer_name);
|
||||
}
|
||||
|
||||
void subdata_upload(const void* src, uint32 count)
|
||||
void subdata_upload(const void* src)
|
||||
{
|
||||
m_count = count;
|
||||
|
||||
// Current GPU buffer is really too small need to allocate a new one
|
||||
if (m_count > m_limit) {
|
||||
//fprintf(stderr, "Allocate a new buffer\n %d", m_stride);
|
||||
|
@ -122,13 +128,13 @@ class GSBufferOGL {
|
|||
gl_BufferSubData(m_target, m_stride * m_start, m_stride * m_count, src);
|
||||
}
|
||||
|
||||
void map_upload(const void* src, uint32 count)
|
||||
void map_upload(const void* src)
|
||||
{
|
||||
void* dst;
|
||||
|
||||
m_count = count;
|
||||
size_t offset = m_start*m_stride;
|
||||
size_t length = m_count*m_stride;
|
||||
//fprintf(stderr, "Upload from %x offset %x bytes (%x)\n", offset, length, m_target);
|
||||
|
||||
// Get the pointer of the buffer
|
||||
{
|
||||
|
@ -137,29 +143,67 @@ class GSBufferOGL {
|
|||
fprintf(stderr, "Buffer (%x) too small! Please report it upstream\n", m_target);
|
||||
ASSERT(0);
|
||||
} else if (m_count > (m_limit - m_start) ) {
|
||||
//fprintf(stderr, "Wrap buffer (%x)\n", m_target);
|
||||
size_t current_chunk = offset >> 20;
|
||||
#ifdef ENABLE_OGL_DEBUG_FENCE
|
||||
fprintf(stderr, "%x: Wrap buffer\n", m_target);
|
||||
fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, current_chunk);
|
||||
#endif
|
||||
ASSERT(current_chunk > 0 && current_chunk < 5);
|
||||
if (m_fence[current_chunk] == 0) {
|
||||
m_fence[current_chunk] = gl_FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
}
|
||||
|
||||
// Wrap at startup
|
||||
m_start = 0;
|
||||
offset = 0;
|
||||
|
||||
// Only check first chunk
|
||||
if (m_fence[0]) {
|
||||
#ifdef ENABLE_OGL_DEBUG_FENCE
|
||||
GLenum status = gl_ClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
if (status != GL_ALREADY_SIGNALED) {
|
||||
fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target);
|
||||
}
|
||||
#else
|
||||
gl_ClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
#endif
|
||||
gl_DeleteSync(m_fence[0]);
|
||||
m_fence[0] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Protect buffer with fences
|
||||
size_t current_chunk = offset >> 20;
|
||||
size_t next_chunk = (offset + length) >> 20;
|
||||
for (size_t c = current_chunk + 1; c <= next_chunk; c++) {
|
||||
#ifdef ENABLE_OGL_DEBUG_FENCE
|
||||
fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, c-1);
|
||||
#endif
|
||||
ASSERT(c > 0 && c < 5);
|
||||
m_fence[c-1] = gl_FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
if (m_fence[c]) {
|
||||
#ifdef ENABLE_OGL_DEBUG_FENCE
|
||||
GLenum status = gl_ClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
#else
|
||||
gl_ClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
#endif
|
||||
gl_DeleteSync(m_fence[c]);
|
||||
m_fence[c] = 0;
|
||||
|
||||
#ifdef ENABLE_OGL_DEBUG_FENCE
|
||||
if (status != GL_ALREADY_SIGNALED) {
|
||||
fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
dst = m_buffer_ptr + offset;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// FIXME which one to use. Note dst doesn't have any aligment guarantee
|
||||
// because it depends of the offset
|
||||
if (m_target == GL_ARRAY_BUFFER) {
|
||||
GSVector4i::storent(dst, src, length);
|
||||
} else {
|
||||
memcpy(dst, src, length);
|
||||
}
|
||||
#else
|
||||
memcpy(dst, src, length);
|
||||
#endif
|
||||
#if 1
|
||||
gl_FlushMappedBufferRange(m_target, offset, length);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef ENABLE_GLES
|
||||
|
@ -180,10 +224,13 @@ class GSBufferOGL {
|
|||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||
g_vertex_upload_byte += count*m_stride;
|
||||
#endif
|
||||
|
||||
m_count = count;
|
||||
|
||||
if (m_buffer_storage) {
|
||||
map_upload(src, count);
|
||||
map_upload(src);
|
||||
} else {
|
||||
subdata_upload(src, count);
|
||||
subdata_upload(src);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
|
||||
#if defined(_DEBUG) || defined(_DEVEL)
|
||||
#define ENABLE_OGL_DEBUG // Create a debug context and check opengl command status. Allow also to dump various textures/states.
|
||||
//#define ENABLE_OGL_DEBUG_FENCE
|
||||
#endif
|
||||
//#define ENABLE_OGL_DEBUG_MEM_BW // compute the quantity of data transfered (debug purpose)
|
||||
|
||||
|
|
Loading…
Reference in New Issue