Merge pull request #1414 from PCSX2/gsdx-single-pbo

gsdx ogl: replace eight 8MB PBO with a single fat 64MB PBO
This commit is contained in:
Gregory Hainaut 2016-06-19 13:40:38 +02:00 committed by GitHub
commit 41c522104e
2 changed files with 64 additions and 63 deletions

View File

@ -35,13 +35,14 @@ extern uint64 g_real_texture_upload_byte;
// FIXME OGL4: investigate, only 1 unpack buffer always bound // FIXME OGL4: investigate, only 1 unpack buffer always bound
namespace PboPool { namespace PboPool {
GLuint m_pool[PBO_POOL_SIZE]; const uint32 m_pbo_size = 64*1024*1024;
uptr m_offset[PBO_POOL_SIZE]; const uint32 m_seg_size = 16*1024*1024;
char* m_map[PBO_POOL_SIZE];
uint32 m_current_pbo = 0; GLuint m_buffer;
uptr m_offset;
char* m_map;
uint32 m_size; uint32 m_size;
GLsync m_fence[PBO_POOL_SIZE]; GLsync m_fence[m_pbo_size/m_seg_size];
const uint32 m_pbo_size = 8*1024*1024;
// Option for buffer storage // Option for buffer storage
// XXX: actually does I really need coherent and barrier??? // XXX: actually does I really need coherent and barrier???
@ -52,20 +53,20 @@ namespace PboPool {
const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT; const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT;
void Init() { void Init() {
glGenBuffers(countof(m_pool), m_pool); glGenBuffers(1, &m_buffer);
for (size_t i = 0; i < countof(m_pool); i++) { BindPbo();
BindPbo();
string pretty_name = "PBO" + to_string(i); glObjectLabel(GL_BUFFER, m_buffer, -1, "PBO");
glObjectLabel(GL_BUFFER, m_pool[i], pretty_name.size(), pretty_name.c_str());
glBufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, create_flags); glBufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, create_flags);
m_map[m_current_pbo] = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags); m_map = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags);
m_fence[m_current_pbo] = 0; m_offset = 0;
NextPbo(); for (size_t i = 0; i < countof(m_fence); i++) {
m_fence[i] = 0;
} }
UnbindPbo(); UnbindPbo();
} }
@ -77,69 +78,70 @@ namespace PboPool {
fprintf(stderr, "BUG: PBO too small %d but need %d\n", m_pbo_size, m_size); fprintf(stderr, "BUG: PBO too small %d but need %d\n", m_pbo_size, m_size);
} }
if (m_offset[m_current_pbo] + m_size >= m_pbo_size) {
//NextPbo(); // For test purpose
NextPboWithSync();
}
// Note: texsubimage will access currently bound buffer // Note: texsubimage will access currently bound buffer
// Pbo ready let's get a pointer // Pbo ready let's get a pointer
BindPbo(); BindPbo();
map = m_map[m_current_pbo] + m_offset[m_current_pbo]; Sync();
map = m_map + m_offset;
return map; return map;
} }
void Unmap() { void Unmap() {
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset[m_current_pbo], m_size); glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset, m_size);
} }
uptr Offset() { uptr Offset() {
return m_offset[m_current_pbo]; return m_offset;
} }
void Destroy() { void Destroy() {
for (size_t i = 0; i < countof(m_pool); i++) { m_map = NULL;
m_map[i] = NULL; m_offset = 0;
m_offset[i] = 0;
glDeleteSync(m_fence[i]);
// Don't know if we must do it // Don't know if we must do it
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pool[i]); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glDeleteBuffers(countof(m_pool), m_pool);
for (size_t i = 0; i < countof(m_fence); i++) {
glDeleteSync(m_fence[i]);
}
glDeleteBuffers(1, &m_buffer);
} }
void BindPbo() { void BindPbo() {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pool[m_current_pbo]); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer);
} }
void NextPbo() { void Sync() {
m_current_pbo = (m_current_pbo + 1) & (countof(m_pool)-1); uint32 segment_current = m_offset / m_seg_size;
// Mark new PBO as free uint32 segment_next = (m_offset + m_size) / m_seg_size;
m_offset[m_current_pbo] = 0;
}
void NextPboWithSync() { if (segment_current != segment_next) {
m_fence[m_current_pbo] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); if (segment_next >= countof(m_fence)) {
NextPbo(); segment_next = 0;
if (m_fence[m_current_pbo]) { }
#ifdef ENABLE_OGL_DEBUG_FENCE // Align current transfer on the start of the segment
GLenum status = glClientWaitSync(m_fence[m_current_pbo], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); m_offset = m_seg_size * segment_next;
#else
glClientWaitSync(m_fence[m_current_pbo], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); // protect the left segment
#endif m_fence[segment_current] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
glDeleteSync(m_fence[m_current_pbo]);
m_fence[m_current_pbo] = 0; // Check next segment is free
if (m_fence[segment_next]) {
#ifdef ENABLE_OGL_DEBUG_FENCE GLenum status = glClientWaitSync(m_fence[segment_next], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
if (status != GL_ALREADY_SIGNALED) { // Potentially it doesn't work on AMD driver which might always return GL_CONDITION_SATISFIED
fprintf(stderr, "GL_PIXEL_UNPACK_BUFFER: Sync Sync! Buffer too small\n"); if (status != GL_ALREADY_SIGNALED) {
GL_PERF("GL_PIXEL_UNPACK_BUFFER: Sync Sync (%x)! Buffer too small ?", status);
}
glDeleteSync(m_fence[segment_next]);
m_fence[segment_next] = 0;
} }
#endif
} }
} }
@ -149,7 +151,7 @@ namespace PboPool {
void EndTransfer() { void EndTransfer() {
// Note: keep offset aligned for SSE/AVX // Note: keep offset aligned for SSE/AVX
m_offset[m_current_pbo] = (m_offset[m_current_pbo] + m_size + 63) & ~0x3F; m_offset += (m_size + 63) & ~0x3F;
} }
} }

View File

@ -24,15 +24,14 @@
#include "GSTexture.h" #include "GSTexture.h"
namespace PboPool { namespace PboPool {
void BindPbo(); inline void BindPbo();
void UnbindPbo(); inline void UnbindPbo();
void NextPbo(); inline void Sync();
void NextPboWithSync();
char* Map(uint32 size); inline char* Map(uint32 size);
void Unmap(); inline void Unmap();
uptr Offset(); inline uptr Offset();
void EndTransfer(); inline void EndTransfer();
void Init(); void Init();
void Destroy(); void Destroy();