mirror of https://github.com/PCSX2/pcsx2.git
gsdx ogl:
* redo most of the texture upload (PBO): colin3 benchmark: 32 fps now (vs 26 fps 2 weeks ago) * use the cross vendor vsync extension on linux (previous wasn't supported by nvidia) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5721 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
07605941ef
commit
690432de30
|
@ -81,6 +81,10 @@ PFNGLUSEPROGRAMSTAGESPROC gl_UseProgramStages = NULL;
|
|||
PFNGLVERTEXATTRIBIPOINTERPROC gl_VertexAttribIPointer = NULL;
|
||||
PFNGLVERTEXATTRIBPOINTERPROC gl_VertexAttribPointer = NULL;
|
||||
PFNGLBUFFERSUBDATAPROC gl_BufferSubData = NULL;
|
||||
PFNGLFENCESYNCPROC gl_FenceSync = NULL;
|
||||
PFNGLDELETESYNCPROC gl_DeleteSync = NULL;
|
||||
PFNGLCLIENTWAITSYNCPROC gl_ClientWaitSync = NULL;
|
||||
PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange = NULL;
|
||||
// GL4.0
|
||||
PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv = NULL;
|
||||
// GL4.1
|
||||
|
|
|
@ -145,6 +145,10 @@ extern PFNGLUSEPROGRAMSTAGESPROC gl_UseProgramStages;
|
|||
extern PFNGLVERTEXATTRIBIPOINTERPROC gl_VertexAttribIPointer;
|
||||
extern PFNGLVERTEXATTRIBPOINTERPROC gl_VertexAttribPointer;
|
||||
extern PFNGLBUFFERSUBDATAPROC gl_BufferSubData;
|
||||
extern PFNGLFENCESYNCPROC gl_FenceSync;
|
||||
extern PFNGLDELETESYNCPROC gl_DeleteSync;
|
||||
extern PFNGLCLIENTWAITSYNCPROC gl_ClientWaitSync;
|
||||
extern PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange;
|
||||
// GL4.0
|
||||
extern PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv;
|
||||
// GL4.1
|
||||
|
|
|
@ -24,35 +24,144 @@
|
|||
#include "GSTextureOGL.h"
|
||||
#include "GLState.h"
|
||||
|
||||
// Flush need bind/unbind
|
||||
// Barrier might sync much more
|
||||
#define BARRIER_INSTEAD_FLUSH
|
||||
|
||||
namespace PboPool {
|
||||
|
||||
GLuint pool[8];
|
||||
uint32 current_pbo = 0;
|
||||
GLuint m_pool[PBO_POOL_SIZE];
|
||||
uint32 m_offset[PBO_POOL_SIZE];
|
||||
char* m_map[PBO_POOL_SIZE];
|
||||
uint32 m_current_pbo = 0;
|
||||
uint32 m_size;
|
||||
const uint32 m_pbo_size = (640*480*16) << 2;
|
||||
|
||||
void Init() {
|
||||
gl_GenBuffers(countof(pool), pool);
|
||||
gl_GenBuffers(countof(m_pool), m_pool);
|
||||
|
||||
GLuint size = (640*480*16) << 2;
|
||||
|
||||
for (size_t i = 0; i < countof(pool); i++) {
|
||||
for (size_t i = 0; i < countof(m_pool); i++) {
|
||||
BindPbo();
|
||||
gl_BufferData(GL_PIXEL_UNPACK_BUFFER, size, NULL, GL_STREAM_DRAW);
|
||||
|
||||
if (GLLoader::found_GL_ARB_buffer_storage) {
|
||||
gl_BufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_DYNAMIC_STORAGE_BIT | GL_CLIENT_STORAGE_BIT);
|
||||
} else {
|
||||
gl_BufferData(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_STREAM_DRAW);
|
||||
m_offset[m_current_pbo] = 0;
|
||||
m_map[m_current_pbo] = NULL;
|
||||
}
|
||||
|
||||
NextPbo();
|
||||
}
|
||||
UnbindPbo();
|
||||
}
|
||||
|
||||
void MapAll() {
|
||||
if (m_map[m_current_pbo] != NULL) return;
|
||||
|
||||
// FIXME I'm not sure it is allowed to map another buffer after we get a pointer
|
||||
#ifdef BARRIER_INSTEAD_FLUSH
|
||||
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_PERSISTENT_BIT;
|
||||
#else
|
||||
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT;
|
||||
#endif
|
||||
for (size_t i = 0; i < countof(m_pool); i++) {
|
||||
BindPbo();
|
||||
m_map[m_current_pbo] = (char*)gl_MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, flags);
|
||||
NextPbo();
|
||||
}
|
||||
UnbindPbo();
|
||||
}
|
||||
|
||||
char* Map(uint32 size) {
|
||||
m_size = size;
|
||||
|
||||
if (m_size >= m_pbo_size) {
|
||||
fprintf(stderr, "BUG: PBO too small %d but need %d\n", m_pbo_size, m_size);
|
||||
}
|
||||
|
||||
if (!GLLoader::found_GL_ARB_buffer_storage) {
|
||||
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_RANGE_BIT;
|
||||
|
||||
if (m_offset[m_current_pbo] + m_size >= m_pbo_size) {
|
||||
NextPbo();
|
||||
|
||||
// Mark current pbo free
|
||||
m_offset[m_current_pbo] = 0;
|
||||
|
||||
flags &= ~GL_MAP_INVALIDATE_RANGE_BIT;
|
||||
flags |= GL_MAP_INVALIDATE_BUFFER_BIT;
|
||||
}
|
||||
|
||||
// Pbo ready let's get a pointer
|
||||
BindPbo();
|
||||
|
||||
return (char*)gl_MapBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset[m_current_pbo], m_size, flags);
|
||||
} else {
|
||||
MapAll();
|
||||
|
||||
if (m_offset[m_current_pbo] + m_size >= m_pbo_size) {
|
||||
NextPbo();
|
||||
|
||||
// Mark current pbo free
|
||||
m_offset[m_current_pbo] = 0;
|
||||
}
|
||||
|
||||
return m_map[m_current_pbo] + m_offset[m_current_pbo];
|
||||
}
|
||||
}
|
||||
|
||||
void UnmapAll() {
|
||||
if (m_map[m_current_pbo] == NULL) return;
|
||||
|
||||
for (size_t i = 0; i < countof(m_pool); i++) {
|
||||
BindPbo();
|
||||
gl_UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
|
||||
m_map[m_current_pbo] == NULL;
|
||||
NextPbo();
|
||||
}
|
||||
UnbindPbo();
|
||||
}
|
||||
|
||||
void Unmap() {
|
||||
if (GLLoader::found_GL_ARB_buffer_storage) {
|
||||
// GL4.4 do a glMemoryBarrier? or glFlushMappedBufferRange?
|
||||
#ifdef BARRIER_INSTEAD_FLUSH
|
||||
gl_MemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
|
||||
#else
|
||||
BindPbo();
|
||||
gl_FlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset[m_current_pbo], m_size);
|
||||
UnbindPbo();
|
||||
#endif
|
||||
} else {
|
||||
gl_UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
|
||||
}
|
||||
}
|
||||
|
||||
uint32 Offset() {
|
||||
return m_offset[m_current_pbo];
|
||||
}
|
||||
|
||||
void Destroy() {
|
||||
gl_DeleteBuffers(countof(pool), pool);
|
||||
gl_DeleteBuffers(countof(m_pool), m_pool);
|
||||
}
|
||||
|
||||
void BindPbo() {
|
||||
gl_BindBuffer(GL_PIXEL_UNPACK_BUFFER, pool[current_pbo]);
|
||||
current_pbo = (current_pbo + 1) & (countof(pool)-1);
|
||||
gl_BindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pool[m_current_pbo]);
|
||||
}
|
||||
|
||||
void NextPbo() {
|
||||
m_current_pbo = (m_current_pbo + 1) & (countof(m_pool)-1);
|
||||
}
|
||||
|
||||
void UnbindPbo() {
|
||||
gl_BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
}
|
||||
|
||||
void EndTransfer() {
|
||||
// Note: keep offset aligned for SSE/AVX
|
||||
m_offset[m_current_pbo] += (m_size + 64) & ~0x3F;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: check if it possible to always use those setup by default
|
||||
|
@ -210,27 +319,35 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
|
|||
|
||||
EnableUnit();
|
||||
|
||||
PboPool::BindPbo();
|
||||
|
||||
// Note: FGLRX crashes with the default path. It is happy with PBO. However not sure PBO are big enough for
|
||||
// big upscale
|
||||
// Note: with latest improvement, Pbo could be faster
|
||||
#if 1
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
|
||||
|
||||
char* map = (char*)gl_MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, (pitch * r.height()) << m_int_shift, GL_MAP_WRITE_BIT);
|
||||
char* src = (char*)data;
|
||||
uint32 line_size = r.width() << m_int_shift;
|
||||
char* src = (char*)data;
|
||||
char* map = PboPool::Map(r.height() * line_size);
|
||||
|
||||
for (uint32 h = r.height(); h > 0; h--) {
|
||||
memcpy(map, src, line_size);
|
||||
GSVector4i::storent(map, src, line_size);
|
||||
//memcpy(map, src, line_size);
|
||||
src += pitch;
|
||||
map += line_size;
|
||||
}
|
||||
gl_UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
|
||||
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)0);
|
||||
PboPool::Unmap();
|
||||
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset());
|
||||
|
||||
if (!GLLoader::found_GL_ARB_buffer_storage)
|
||||
PboPool::UnbindPbo();
|
||||
|
||||
PboPool::EndTransfer();
|
||||
|
||||
return true;
|
||||
|
||||
#if 0
|
||||
#else
|
||||
|
||||
// pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
|
||||
|
|
|
@ -23,12 +23,20 @@
|
|||
|
||||
#include "GSTexture.h"
|
||||
|
||||
namespace PboPool {
|
||||
extern GLuint pool[8];
|
||||
extern uint32 current_pbo;
|
||||
// FIXME find the optimal number of PBO
|
||||
#define PBO_POOL_SIZE 4
|
||||
|
||||
namespace PboPool {
|
||||
void BindPbo();
|
||||
void UnbindPbo();
|
||||
void NextPbo();
|
||||
|
||||
char* Map(uint32 size);
|
||||
void MapAll();
|
||||
void Unmap();
|
||||
void UnmapAll();
|
||||
uint32 Offset();
|
||||
void EndTransfer();
|
||||
|
||||
void Init();
|
||||
void Destroy();
|
||||
|
|
|
@ -50,7 +50,9 @@ class GSBufferOGL {
|
|||
{
|
||||
gl_GenBuffers(1, &m_buffer);
|
||||
// Opengl works best with 1-4MB buffer.
|
||||
// Warning m_limit is the number of object (not the size in Bytes)
|
||||
m_limit = 2 * 1024 * 1024 / m_stride;
|
||||
//m_limit = 512 * 1024 * m_stride;
|
||||
}
|
||||
|
||||
~GSBufferOGL() { gl_DeleteBuffers(1, &m_buffer); }
|
||||
|
@ -75,9 +77,12 @@ class GSBufferOGL {
|
|||
|
||||
// Current GPU buffer is really too small need to allocate a new one
|
||||
if (m_count > m_limit) {
|
||||
//fprintf(stderr, "Allocate a new buffer\n %d", m_stride);
|
||||
allocate(std::max<int>(m_count * 3 / 2, m_limit));
|
||||
|
||||
} else if (m_count > (m_limit - m_start) ) {
|
||||
//fprintf(stderr, "Orphan the buffer %d\n", m_stride);
|
||||
|
||||
// Not enough left free room. Just go back at the beginning
|
||||
m_start = 0;
|
||||
// Orphan the buffer to avoid synchronization
|
||||
|
@ -91,8 +96,9 @@ class GSBufferOGL {
|
|||
{
|
||||
void* dst;
|
||||
if (Map(&dst, count)) {
|
||||
// FIXME which one to use
|
||||
// GSVector4i::storent(dst, src, m_count * m_stride);
|
||||
// FIXME which one to use. Note dst doesn't have any aligment guarantee
|
||||
// because it depends of the offset
|
||||
//GSVector4i::storent(dst, src, m_count * m_stride);
|
||||
memcpy(dst, src, m_stride*m_count);
|
||||
Unmap();
|
||||
}
|
||||
|
|
|
@ -82,6 +82,10 @@ void GSWndGL::PopulateGlFunction()
|
|||
*(void**)&(gl_VertexAttribIPointer) = GetProcAddress("glVertexAttribIPointer");
|
||||
*(void**)&(gl_VertexAttribPointer) = GetProcAddress("glVertexAttribPointer");
|
||||
*(void**)&(gl_BufferSubData) = GetProcAddress("glBufferSubData");
|
||||
*(void**)&(gl_FenceSync) = GetProcAddress("glFenceSync");
|
||||
*(void**)&(gl_DeleteSync) = GetProcAddress("glDeleteSync");
|
||||
*(void**)&(gl_ClientWaitSync) = GetProcAddress("glClientWaitSync");
|
||||
*(void**)&(gl_FlushMappedBufferRange) = GetProcAddress("glFlushMappedBufferRange");
|
||||
// GL4.0
|
||||
*(void**)&(gl_UniformSubroutinesuiv) = GetProcAddress("glUniformSubroutinesuiv");
|
||||
// GL4.1
|
||||
|
|
|
@ -144,8 +144,7 @@ bool GSWndOGL::Attach(void* handle, bool managed)
|
|||
|
||||
CheckContext();
|
||||
|
||||
m_swapinterval = (PFNGLXSWAPINTERVALMESAPROC)glXGetProcAddress((const GLubyte*) "glXSwapIntervalMESA");
|
||||
//PFNGLXSWAPINTERVALMESAPROC m_swapinterval = (PFNGLXSWAPINTERVALMESAPROC)glXGetProcAddress((const GLubyte*) "glXSwapInterval");
|
||||
m_swapinterval = (PFNGLXSWAPINTERVALEXTPROC)glXGetProcAddress((const GLubyte*) "glXSwapIntervalEXT");
|
||||
|
||||
PopulateGlFunction();
|
||||
|
||||
|
@ -192,6 +191,8 @@ bool GSWndOGL::Create(const string& title, int w, int h)
|
|||
|
||||
CheckContext();
|
||||
|
||||
m_swapinterval = (PFNGLXSWAPINTERVALEXTPROC)glXGetProcAddress((const GLubyte*) "glXSwapIntervalEXT");
|
||||
|
||||
PopulateGlFunction();
|
||||
|
||||
return true;
|
||||
|
@ -258,7 +259,7 @@ void GSWndOGL::SetVSync(bool enable)
|
|||
// m_swapinterval uses an integer as parameter
|
||||
// 0 -> disable vsync
|
||||
// n -> wait n frame
|
||||
if (m_swapinterval) m_swapinterval((int)enable);
|
||||
if (m_swapinterval) m_swapinterval(m_NativeDisplay, m_NativeWindow, (int)enable);
|
||||
}
|
||||
|
||||
void GSWndOGL::Flip()
|
||||
|
|
|
@ -31,7 +31,7 @@ class GSWndOGL : public GSWndGL
|
|||
Display* m_NativeDisplay;
|
||||
GLXContext m_context;
|
||||
|
||||
PFNGLXSWAPINTERVALMESAPROC m_swapinterval;
|
||||
PFNGLXSWAPINTERVALEXTPROC m_swapinterval;
|
||||
|
||||
void CreateContext(int major, int minor);
|
||||
void CheckContext();
|
||||
|
|
Loading…
Reference in New Issue