gsdx-ogl: improve texture uploading

Initially we copy pitch by line in the PBO and tell the dma to only
use the first valid byte.

Now, we only copy useful data to the PBO. It reduce the copy and PBO memory requirement.

It seems a bit faster on native resolution
This commit is contained in:
Gregory Hainaut 2015-05-11 16:29:09 +02:00
parent 4e2e9aa56c
commit f37f3cb3cf
4 changed files with 11 additions and 22 deletions

View File

@ -1673,8 +1673,7 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer)
}
#ifdef ENABLE_OGL_DEBUG_MEM_BW
total_frame_nb *= 1024;
fprintf(stderr, "memory bandwith. T: %f KB/f. RT: %f KB/f. V: %f KB/f. U: %f KB/f\n",
(float)g_texture_upload_byte/(float)total_frame_nb,
fprintf(stderr, "memory bandwith. T: %f KB/f. V: %f KB/f. U: %f KB/f\n",
(float)g_real_texture_upload_byte/(float)total_frame_nb,
(float)g_vertex_upload_byte/(float)total_frame_nb,
(float)g_uniform_upload_byte/(float)total_frame_nb

View File

@ -31,7 +31,6 @@
static uint32 g_draw_count = 0;
// TODO port those value into PerfMon API
#ifdef ENABLE_OGL_DEBUG_MEM_BW
uint64 g_texture_upload_byte = 0;
uint64 g_real_texture_upload_byte = 0;
uint64 g_vertex_upload_byte = 0;
uint64 g_uniform_upload_byte = 0;

View File

@ -30,7 +30,6 @@
#include "GLState.h"
#ifdef ENABLE_OGL_DEBUG_MEM_BW
extern uint64 g_texture_upload_byte;
extern uint64 g_real_texture_upload_byte;
extern uint64 g_vertex_upload_byte;
#endif

View File

@ -25,7 +25,6 @@
#include "GLState.h"
#ifdef ENABLE_OGL_DEBUG_MEM_BW
extern uint64 g_texture_upload_byte;
extern uint64 g_real_texture_upload_byte;
#endif
@ -271,30 +270,23 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch)
}
char* src = (char*)data;
char* map = PboPool::Map(r.height() * pitch);
uint32 row_byte = r.width() << m_int_shift;
uint32 map_size = r.height() * row_byte;
char* map = PboPool::Map(map_size);
#ifdef ENABLE_OGL_DEBUG_MEM_BW
// Note: pitch is the line size that will be copied into the PBO
// pitch >> m_int_shift is the line size that will be actually dma-ed into the GPU
g_texture_upload_byte += pitch * r.height();
g_real_texture_upload_byte += (r.width() * r.height()) << m_int_shift;
g_real_texture_upload_byte += row_byte * r.height();
#endif
memcpy(map, src, pitch*r.height());
// Note: row_byte != pitch
for (int h = 0; h < r.height(); h++) {
memcpy(map, src, row_byte);
map += row_byte;
src += pitch;
}
PboPool::Unmap();
// Note: reduce noise for gl retracers
// It might introduce bug after an emulator pause so always set it in standard mode
if (GLLoader::in_replayer) {
static int unpack_row_length = 0;
if (unpack_row_length != (pitch >> m_int_shift)) {
unpack_row_length = pitch >> m_int_shift;
glPixelStorei(GL_UNPACK_ROW_LENGTH, unpack_row_length);
}
} else {
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift);
}
gl_TextureSubImage2D(m_texture_id, GL_TEX_LEVEL_0, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset());
// FIXME OGL4: investigate, only 1 unpack buffer always bound