mirror of https://github.com/PCSX2/pcsx2.git
gsdx ogl:
* redo most of the texture upload (PBO): colin3 benchmark: 32 fps now (vs 26 fps 2 weeks ago) * use the cross vendor vsync extension on linux (previous wasn't supported by nvidia) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5721 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
07605941ef
commit
690432de30
|
@ -81,6 +81,10 @@ PFNGLUSEPROGRAMSTAGESPROC gl_UseProgramStages = NULL;
|
||||||
PFNGLVERTEXATTRIBIPOINTERPROC gl_VertexAttribIPointer = NULL;
|
PFNGLVERTEXATTRIBIPOINTERPROC gl_VertexAttribIPointer = NULL;
|
||||||
PFNGLVERTEXATTRIBPOINTERPROC gl_VertexAttribPointer = NULL;
|
PFNGLVERTEXATTRIBPOINTERPROC gl_VertexAttribPointer = NULL;
|
||||||
PFNGLBUFFERSUBDATAPROC gl_BufferSubData = NULL;
|
PFNGLBUFFERSUBDATAPROC gl_BufferSubData = NULL;
|
||||||
|
PFNGLFENCESYNCPROC gl_FenceSync = NULL;
|
||||||
|
PFNGLDELETESYNCPROC gl_DeleteSync = NULL;
|
||||||
|
PFNGLCLIENTWAITSYNCPROC gl_ClientWaitSync = NULL;
|
||||||
|
PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange = NULL;
|
||||||
// GL4.0
|
// GL4.0
|
||||||
PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv = NULL;
|
PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv = NULL;
|
||||||
// GL4.1
|
// GL4.1
|
||||||
|
|
|
@ -145,6 +145,10 @@ extern PFNGLUSEPROGRAMSTAGESPROC gl_UseProgramStages;
|
||||||
extern PFNGLVERTEXATTRIBIPOINTERPROC gl_VertexAttribIPointer;
|
extern PFNGLVERTEXATTRIBIPOINTERPROC gl_VertexAttribIPointer;
|
||||||
extern PFNGLVERTEXATTRIBPOINTERPROC gl_VertexAttribPointer;
|
extern PFNGLVERTEXATTRIBPOINTERPROC gl_VertexAttribPointer;
|
||||||
extern PFNGLBUFFERSUBDATAPROC gl_BufferSubData;
|
extern PFNGLBUFFERSUBDATAPROC gl_BufferSubData;
|
||||||
|
extern PFNGLFENCESYNCPROC gl_FenceSync;
|
||||||
|
extern PFNGLDELETESYNCPROC gl_DeleteSync;
|
||||||
|
extern PFNGLCLIENTWAITSYNCPROC gl_ClientWaitSync;
|
||||||
|
extern PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange;
|
||||||
// GL4.0
|
// GL4.0
|
||||||
extern PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv;
|
extern PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv;
|
||||||
// GL4.1
|
// GL4.1
|
||||||
|
|
|
@ -24,35 +24,144 @@
|
||||||
#include "GSTextureOGL.h"
|
#include "GSTextureOGL.h"
|
||||||
#include "GLState.h"
|
#include "GLState.h"
|
||||||
|
|
||||||
|
// Flush need bind/unbind
|
||||||
|
// Barrier might sync much more
|
||||||
|
#define BARRIER_INSTEAD_FLUSH
|
||||||
|
|
||||||
namespace PboPool {
|
namespace PboPool {
|
||||||
|
|
||||||
GLuint pool[8];
|
GLuint m_pool[PBO_POOL_SIZE];
|
||||||
uint32 current_pbo = 0;
|
uint32 m_offset[PBO_POOL_SIZE];
|
||||||
|
char* m_map[PBO_POOL_SIZE];
|
||||||
|
uint32 m_current_pbo = 0;
|
||||||
|
uint32 m_size;
|
||||||
|
const uint32 m_pbo_size = (640*480*16) << 2;
|
||||||
|
|
||||||
void Init() {
|
void Init() {
|
||||||
gl_GenBuffers(countof(pool), pool);
|
gl_GenBuffers(countof(m_pool), m_pool);
|
||||||
|
|
||||||
GLuint size = (640*480*16) << 2;
|
for (size_t i = 0; i < countof(m_pool); i++) {
|
||||||
|
|
||||||
for (size_t i = 0; i < countof(pool); i++) {
|
|
||||||
BindPbo();
|
BindPbo();
|
||||||
gl_BufferData(GL_PIXEL_UNPACK_BUFFER, size, NULL, GL_STREAM_DRAW);
|
|
||||||
|
if (GLLoader::found_GL_ARB_buffer_storage) {
|
||||||
|
gl_BufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_DYNAMIC_STORAGE_BIT | GL_CLIENT_STORAGE_BIT);
|
||||||
|
} else {
|
||||||
|
gl_BufferData(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, GL_STREAM_DRAW);
|
||||||
|
m_offset[m_current_pbo] = 0;
|
||||||
|
m_map[m_current_pbo] = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
NextPbo();
|
||||||
}
|
}
|
||||||
UnbindPbo();
|
UnbindPbo();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MapAll() {
|
||||||
|
if (m_map[m_current_pbo] != NULL) return;
|
||||||
|
|
||||||
|
// FIXME I'm not sure it is allowed to map another buffer after we get a pointer
|
||||||
|
#ifdef BARRIER_INSTEAD_FLUSH
|
||||||
|
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_PERSISTENT_BIT;
|
||||||
|
#else
|
||||||
|
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT;
|
||||||
|
#endif
|
||||||
|
for (size_t i = 0; i < countof(m_pool); i++) {
|
||||||
|
BindPbo();
|
||||||
|
m_map[m_current_pbo] = (char*)gl_MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, flags);
|
||||||
|
NextPbo();
|
||||||
|
}
|
||||||
|
UnbindPbo();
|
||||||
|
}
|
||||||
|
|
||||||
|
char* Map(uint32 size) {
|
||||||
|
m_size = size;
|
||||||
|
|
||||||
|
if (m_size >= m_pbo_size) {
|
||||||
|
fprintf(stderr, "BUG: PBO too small %d but need %d\n", m_pbo_size, m_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!GLLoader::found_GL_ARB_buffer_storage) {
|
||||||
|
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_RANGE_BIT;
|
||||||
|
|
||||||
|
if (m_offset[m_current_pbo] + m_size >= m_pbo_size) {
|
||||||
|
NextPbo();
|
||||||
|
|
||||||
|
// Mark current pbo free
|
||||||
|
m_offset[m_current_pbo] = 0;
|
||||||
|
|
||||||
|
flags &= ~GL_MAP_INVALIDATE_RANGE_BIT;
|
||||||
|
flags |= GL_MAP_INVALIDATE_BUFFER_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pbo ready let's get a pointer
|
||||||
|
BindPbo();
|
||||||
|
|
||||||
|
return (char*)gl_MapBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset[m_current_pbo], m_size, flags);
|
||||||
|
} else {
|
||||||
|
MapAll();
|
||||||
|
|
||||||
|
if (m_offset[m_current_pbo] + m_size >= m_pbo_size) {
|
||||||
|
NextPbo();
|
||||||
|
|
||||||
|
// Mark current pbo free
|
||||||
|
m_offset[m_current_pbo] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return m_map[m_current_pbo] + m_offset[m_current_pbo];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void UnmapAll() {
|
||||||
|
if (m_map[m_current_pbo] == NULL) return;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < countof(m_pool); i++) {
|
||||||
|
BindPbo();
|
||||||
|
gl_UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
|
||||||
|
m_map[m_current_pbo] == NULL;
|
||||||
|
NextPbo();
|
||||||
|
}
|
||||||
|
UnbindPbo();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Unmap() {
|
||||||
|
if (GLLoader::found_GL_ARB_buffer_storage) {
|
||||||
|
// GL4.4 do a glMemoryBarrier? or glFlushMappedBufferRange?
|
||||||
|
#ifdef BARRIER_INSTEAD_FLUSH
|
||||||
|
gl_MemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
|
||||||
|
#else
|
||||||
|
BindPbo();
|
||||||
|
gl_FlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset[m_current_pbo], m_size);
|
||||||
|
UnbindPbo();
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
gl_UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32 Offset() {
|
||||||
|
return m_offset[m_current_pbo];
|
||||||
|
}
|
||||||
|
|
||||||
void Destroy() {
|
void Destroy() {
|
||||||
gl_DeleteBuffers(countof(pool), pool);
|
gl_DeleteBuffers(countof(m_pool), m_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BindPbo() {
|
void BindPbo() {
|
||||||
gl_BindBuffer(GL_PIXEL_UNPACK_BUFFER, pool[current_pbo]);
|
gl_BindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pool[m_current_pbo]);
|
||||||
current_pbo = (current_pbo + 1) & (countof(pool)-1);
|
}
|
||||||
|
|
||||||
|
void NextPbo() {
|
||||||
|
m_current_pbo = (m_current_pbo + 1) & (countof(m_pool)-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnbindPbo() {
|
void UnbindPbo() {
|
||||||
gl_BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
gl_BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EndTransfer() {
|
||||||
|
// Note: keep offset aligned for SSE/AVX
|
||||||
|
m_offset[m_current_pbo] += (m_size + 64) & ~0x3F;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: check if it possible to always use those setup by default
|
// FIXME: check if it possible to always use those setup by default
|
||||||
|
@ -210,27 +319,35 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
|
||||||
|
|
||||||
EnableUnit();
|
EnableUnit();
|
||||||
|
|
||||||
PboPool::BindPbo();
|
// Note: FGLRX crashes with the default path. It is happy with PBO. However not sure PBO are big enough for
|
||||||
|
// big upscale
|
||||||
|
// Note: with latest improvement, Pbo could be faster
|
||||||
|
#if 1
|
||||||
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
|
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
|
||||||
|
|
||||||
char* map = (char*)gl_MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, (pitch * r.height()) << m_int_shift, GL_MAP_WRITE_BIT);
|
|
||||||
char* src = (char*)data;
|
|
||||||
uint32 line_size = r.width() << m_int_shift;
|
uint32 line_size = r.width() << m_int_shift;
|
||||||
|
char* src = (char*)data;
|
||||||
|
char* map = PboPool::Map(r.height() * line_size);
|
||||||
|
|
||||||
for (uint32 h = r.height(); h > 0; h--) {
|
for (uint32 h = r.height(); h > 0; h--) {
|
||||||
memcpy(map, src, line_size);
|
GSVector4i::storent(map, src, line_size);
|
||||||
|
//memcpy(map, src, line_size);
|
||||||
src += pitch;
|
src += pitch;
|
||||||
map += line_size;
|
map += line_size;
|
||||||
}
|
}
|
||||||
gl_UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
|
|
||||||
|
|
||||||
glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)0);
|
PboPool::Unmap();
|
||||||
|
|
||||||
|
glTexSubImage2D(GL_TEXTURE_2D, 0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset());
|
||||||
|
|
||||||
|
if (!GLLoader::found_GL_ARB_buffer_storage)
|
||||||
PboPool::UnbindPbo();
|
PboPool::UnbindPbo();
|
||||||
|
|
||||||
|
PboPool::EndTransfer();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
#if 0
|
#else
|
||||||
|
|
||||||
// pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel
|
// pitch is in byte wherease GL_UNPACK_ROW_LENGTH is in pixel
|
||||||
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
|
glPixelStorei(GL_UNPACK_ALIGNMENT, m_int_alignment);
|
||||||
|
|
|
@ -23,12 +23,20 @@
|
||||||
|
|
||||||
#include "GSTexture.h"
|
#include "GSTexture.h"
|
||||||
|
|
||||||
namespace PboPool {
|
// FIXME find the optimal number of PBO
|
||||||
extern GLuint pool[8];
|
#define PBO_POOL_SIZE 4
|
||||||
extern uint32 current_pbo;
|
|
||||||
|
|
||||||
|
namespace PboPool {
|
||||||
void BindPbo();
|
void BindPbo();
|
||||||
void UnbindPbo();
|
void UnbindPbo();
|
||||||
|
void NextPbo();
|
||||||
|
|
||||||
|
char* Map(uint32 size);
|
||||||
|
void MapAll();
|
||||||
|
void Unmap();
|
||||||
|
void UnmapAll();
|
||||||
|
uint32 Offset();
|
||||||
|
void EndTransfer();
|
||||||
|
|
||||||
void Init();
|
void Init();
|
||||||
void Destroy();
|
void Destroy();
|
||||||
|
|
|
@ -50,7 +50,9 @@ class GSBufferOGL {
|
||||||
{
|
{
|
||||||
gl_GenBuffers(1, &m_buffer);
|
gl_GenBuffers(1, &m_buffer);
|
||||||
// Opengl works best with 1-4MB buffer.
|
// Opengl works best with 1-4MB buffer.
|
||||||
|
// Warning m_limit is the number of object (not the size in Bytes)
|
||||||
m_limit = 2 * 1024 * 1024 / m_stride;
|
m_limit = 2 * 1024 * 1024 / m_stride;
|
||||||
|
//m_limit = 512 * 1024 * m_stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
~GSBufferOGL() { gl_DeleteBuffers(1, &m_buffer); }
|
~GSBufferOGL() { gl_DeleteBuffers(1, &m_buffer); }
|
||||||
|
@ -75,9 +77,12 @@ class GSBufferOGL {
|
||||||
|
|
||||||
// Current GPU buffer is really too small need to allocate a new one
|
// Current GPU buffer is really too small need to allocate a new one
|
||||||
if (m_count > m_limit) {
|
if (m_count > m_limit) {
|
||||||
|
//fprintf(stderr, "Allocate a new buffer\n %d", m_stride);
|
||||||
allocate(std::max<int>(m_count * 3 / 2, m_limit));
|
allocate(std::max<int>(m_count * 3 / 2, m_limit));
|
||||||
|
|
||||||
} else if (m_count > (m_limit - m_start) ) {
|
} else if (m_count > (m_limit - m_start) ) {
|
||||||
|
//fprintf(stderr, "Orphan the buffer %d\n", m_stride);
|
||||||
|
|
||||||
// Not enough left free room. Just go back at the beginning
|
// Not enough left free room. Just go back at the beginning
|
||||||
m_start = 0;
|
m_start = 0;
|
||||||
// Orphan the buffer to avoid synchronization
|
// Orphan the buffer to avoid synchronization
|
||||||
|
@ -91,8 +96,9 @@ class GSBufferOGL {
|
||||||
{
|
{
|
||||||
void* dst;
|
void* dst;
|
||||||
if (Map(&dst, count)) {
|
if (Map(&dst, count)) {
|
||||||
// FIXME which one to use
|
// FIXME which one to use. Note dst doesn't have any aligment guarantee
|
||||||
// GSVector4i::storent(dst, src, m_count * m_stride);
|
// because it depends of the offset
|
||||||
|
//GSVector4i::storent(dst, src, m_count * m_stride);
|
||||||
memcpy(dst, src, m_stride*m_count);
|
memcpy(dst, src, m_stride*m_count);
|
||||||
Unmap();
|
Unmap();
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,6 +82,10 @@ void GSWndGL::PopulateGlFunction()
|
||||||
*(void**)&(gl_VertexAttribIPointer) = GetProcAddress("glVertexAttribIPointer");
|
*(void**)&(gl_VertexAttribIPointer) = GetProcAddress("glVertexAttribIPointer");
|
||||||
*(void**)&(gl_VertexAttribPointer) = GetProcAddress("glVertexAttribPointer");
|
*(void**)&(gl_VertexAttribPointer) = GetProcAddress("glVertexAttribPointer");
|
||||||
*(void**)&(gl_BufferSubData) = GetProcAddress("glBufferSubData");
|
*(void**)&(gl_BufferSubData) = GetProcAddress("glBufferSubData");
|
||||||
|
*(void**)&(gl_FenceSync) = GetProcAddress("glFenceSync");
|
||||||
|
*(void**)&(gl_DeleteSync) = GetProcAddress("glDeleteSync");
|
||||||
|
*(void**)&(gl_ClientWaitSync) = GetProcAddress("glClientWaitSync");
|
||||||
|
*(void**)&(gl_FlushMappedBufferRange) = GetProcAddress("glFlushMappedBufferRange");
|
||||||
// GL4.0
|
// GL4.0
|
||||||
*(void**)&(gl_UniformSubroutinesuiv) = GetProcAddress("glUniformSubroutinesuiv");
|
*(void**)&(gl_UniformSubroutinesuiv) = GetProcAddress("glUniformSubroutinesuiv");
|
||||||
// GL4.1
|
// GL4.1
|
||||||
|
|
|
@ -144,8 +144,7 @@ bool GSWndOGL::Attach(void* handle, bool managed)
|
||||||
|
|
||||||
CheckContext();
|
CheckContext();
|
||||||
|
|
||||||
m_swapinterval = (PFNGLXSWAPINTERVALMESAPROC)glXGetProcAddress((const GLubyte*) "glXSwapIntervalMESA");
|
m_swapinterval = (PFNGLXSWAPINTERVALEXTPROC)glXGetProcAddress((const GLubyte*) "glXSwapIntervalEXT");
|
||||||
//PFNGLXSWAPINTERVALMESAPROC m_swapinterval = (PFNGLXSWAPINTERVALMESAPROC)glXGetProcAddress((const GLubyte*) "glXSwapInterval");
|
|
||||||
|
|
||||||
PopulateGlFunction();
|
PopulateGlFunction();
|
||||||
|
|
||||||
|
@ -192,6 +191,8 @@ bool GSWndOGL::Create(const string& title, int w, int h)
|
||||||
|
|
||||||
CheckContext();
|
CheckContext();
|
||||||
|
|
||||||
|
m_swapinterval = (PFNGLXSWAPINTERVALEXTPROC)glXGetProcAddress((const GLubyte*) "glXSwapIntervalEXT");
|
||||||
|
|
||||||
PopulateGlFunction();
|
PopulateGlFunction();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -258,7 +259,7 @@ void GSWndOGL::SetVSync(bool enable)
|
||||||
// m_swapinterval uses an integer as parameter
|
// m_swapinterval uses an integer as parameter
|
||||||
// 0 -> disable vsync
|
// 0 -> disable vsync
|
||||||
// n -> wait n frame
|
// n -> wait n frame
|
||||||
if (m_swapinterval) m_swapinterval((int)enable);
|
if (m_swapinterval) m_swapinterval(m_NativeDisplay, m_NativeWindow, (int)enable);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSWndOGL::Flip()
|
void GSWndOGL::Flip()
|
||||||
|
|
|
@ -31,7 +31,7 @@ class GSWndOGL : public GSWndGL
|
||||||
Display* m_NativeDisplay;
|
Display* m_NativeDisplay;
|
||||||
GLXContext m_context;
|
GLXContext m_context;
|
||||||
|
|
||||||
PFNGLXSWAPINTERVALMESAPROC m_swapinterval;
|
PFNGLXSWAPINTERVALEXTPROC m_swapinterval;
|
||||||
|
|
||||||
void CreateContext(int major, int minor);
|
void CreateContext(int major, int minor);
|
||||||
void CheckContext();
|
void CheckContext();
|
||||||
|
|
Loading…
Reference in New Issue