(GLSL) Avoid memsets - good for about a +1fps speedup with simplified

ribbon on PowerPC Mac
This commit is contained in:
neville 2016-11-10 18:35:47 +01:00
parent 25c46aa7b2
commit 12804431db
2 changed files with 55 additions and 24 deletions

View File

@ -725,7 +725,7 @@ static void *gl_glsl_init(void *data, const char *path)
config_file_t *conf = NULL; config_file_t *conf = NULL;
const char *stock_vertex = NULL; const char *stock_vertex = NULL;
const char *stock_fragment = NULL; const char *stock_fragment = NULL;
glsl_shader_data_t *glsl = (glsl_shader_data_t*) glsl_shader_data_t *glsl = (glsl_shader_data_t*)
calloc(1, sizeof(glsl_shader_data_t)); calloc(1, sizeof(glsl_shader_data_t));
if (!glsl) if (!glsl)
@ -991,8 +991,8 @@ static void gl_glsl_set_uniform_parameter(
struct uniform_info *param, struct uniform_info *param,
void *uniform_data) void *uniform_data)
{ {
GLint location = 0; GLint location = 0;
glsl_shader_data_t *glsl = (glsl_shader_data_t*)data; glsl_shader_data_t *glsl = (glsl_shader_data_t*)data;
if (!param) if (!param)
return; return;
@ -1052,15 +1052,16 @@ static void gl_glsl_set_params(void *data, void *shader_data,
GLfloat buffer[512]; GLfloat buffer[512];
struct glsl_attrib attribs[32]; struct glsl_attrib attribs[32];
float input_size[2], output_size[2], texture_size[2]; float input_size[2], output_size[2], texture_size[2];
unsigned texunit = 1; unsigned texunit = 1;
const struct shader_uniforms *uni = NULL; const struct shader_uniforms *uni = NULL;
size_t size = 0, attribs_size = 0; size_t size = 0;
const struct video_tex_info *info = (const struct video_tex_info*)_info; size_t attribs_size = 0;
const struct video_tex_info *prev_info = (const struct video_tex_info*)_prev_info; const struct video_tex_info *info = (const struct video_tex_info*)_info;
const struct video_tex_info *prev_info = (const struct video_tex_info*)_prev_info;
const struct video_tex_info *feedback_info = (const struct video_tex_info*)_feedback_info; const struct video_tex_info *feedback_info = (const struct video_tex_info*)_feedback_info;
const struct video_tex_info *fbo_info = (const struct video_tex_info*)_fbo_info; const struct video_tex_info *fbo_info = (const struct video_tex_info*)_fbo_info;
struct glsl_attrib *attr = (struct glsl_attrib*)attribs; struct glsl_attrib *attr = (struct glsl_attrib*)attribs;
glsl_shader_data_t *glsl = (glsl_shader_data_t*)shader_data; glsl_shader_data_t *glsl = (glsl_shader_data_t*)shader_data;
if (!glsl) if (!glsl)
return; return;
@ -1139,7 +1140,14 @@ static void gl_glsl_set_params(void *data, void *shader_data,
attribs_size++; attribs_size++;
attr++; attr++;
memcpy(buffer + size, info->coord, 8 * sizeof(GLfloat)); buffer[size ] = info->coord[0];
buffer[size + 1] = info->coord[1];
buffer[size + 2] = info->coord[2];
buffer[size + 3] = info->coord[3];
buffer[size + 4] = info->coord[4];
buffer[size + 5] = info->coord[5];
buffer[size + 6] = info->coord[6];
buffer[size + 7] = info->coord[7];
size += 8; size += 8;
} }
@ -1168,7 +1176,14 @@ static void gl_glsl_set_params(void *data, void *shader_data,
attribs_size++; attribs_size++;
attr++; attr++;
memcpy(buffer + size, feedback_info->coord, 8 * sizeof(GLfloat)); buffer[size ] = feedback_info->coord[0];
buffer[size + 1] = feedback_info->coord[1];
buffer[size + 2] = feedback_info->coord[2];
buffer[size + 3] = feedback_info->coord[3];
buffer[size + 4] = feedback_info->coord[4];
buffer[size + 5] = feedback_info->coord[5];
buffer[size + 6] = feedback_info->coord[6];
buffer[size + 7] = feedback_info->coord[7];
size += 8; size += 8;
} }
@ -1197,7 +1212,14 @@ static void gl_glsl_set_params(void *data, void *shader_data,
attribs_size++; attribs_size++;
attr++; attr++;
memcpy(buffer + size, fbo_info[i].coord, 8 * sizeof(GLfloat)); buffer[size ] = fbo_info[i].coord[0];
buffer[size + 1] = fbo_info[i].coord[1];
buffer[size + 2] = fbo_info[i].coord[2];
buffer[size + 3] = fbo_info[i].coord[3];
buffer[size + 4] = fbo_info[i].coord[4];
buffer[size + 5] = fbo_info[i].coord[5];
buffer[size + 6] = fbo_info[i].coord[6];
buffer[size + 7] = fbo_info[i].coord[7];
size += 8; size += 8;
} }
} }
@ -1230,7 +1252,14 @@ static void gl_glsl_set_params(void *data, void *shader_data,
attribs_size++; attribs_size++;
attr++; attr++;
memcpy(buffer + size, prev_info[i].coord, 8 * sizeof(GLfloat)); buffer[size ] = prev_info[i].coord[0];
buffer[size + 1] = prev_info[i].coord[1];
buffer[size + 2] = prev_info[i].coord[2];
buffer[size + 3] = prev_info[i].coord[3];
buffer[size + 4] = prev_info[i].coord[4];
buffer[size + 5] = prev_info[i].coord[5];
buffer[size + 6] = prev_info[i].coord[6];
buffer[size + 7] = prev_info[i].coord[7];
size += 8; size += 8;
} }
} }
@ -1301,13 +1330,14 @@ fallback:
#define gl_glsl_set_coord_array(attr, coord1, coord2, coords, size, multiplier) \ #define gl_glsl_set_coord_array(attr, coord1, coord2, coords, size, multiplier) \
{ \ { \
unsigned y; \
attr->loc = coord1; \ attr->loc = coord1; \
attr->size = multiplier; \ attr->size = multiplier; \
attr->offset = size * sizeof(GLfloat); \ attr->offset = size * sizeof(GLfloat); \
attribs_size++; \ attribs_size++; \
attr++; \ attr++; \
memcpy(buffer + size, coord2, \ for (y = 0; y < (multiplier * coords->vertices); y++) \
multiplier * coords->vertices * sizeof(GLfloat)); \ buffer[y + size] = coord2[y]; \
size += multiplier * coords->vertices; \ size += multiplier * coords->vertices; \
} }
@ -1317,10 +1347,11 @@ static bool gl_glsl_set_coords(void *handle_data, void *shader_data, const struc
GLfloat short_buffer[4 * (2 + 2 + 4 + 2)]; GLfloat short_buffer[4 * (2 + 2 + 4 + 2)];
GLfloat *buffer; GLfloat *buffer;
struct glsl_attrib attribs[4]; struct glsl_attrib attribs[4];
size_t attribs_size = 0, size = 0; size_t attribs_size = 0;
struct glsl_attrib *attr = NULL; size_t size = 0;
struct glsl_attrib *attr = NULL;
const struct shader_uniforms *uni = NULL; const struct shader_uniforms *uni = NULL;
glsl_shader_data_t *glsl = (glsl_shader_data_t*)shader_data; glsl_shader_data_t *glsl = (glsl_shader_data_t*)shader_data;
if (!glsl || !glsl->shader->modern || !coords) if (!glsl || !glsl->shader->modern || !coords)
goto fallback; goto fallback;

View File

@ -282,7 +282,7 @@
ONLY_ACTIVE_ARCH = YES; ONLY_ACTIVE_ARCH = YES;
OTHER_CFLAGS = ( OTHER_CFLAGS = (
"-DHAVE_GRIFFIN", "-DHAVE_GRIFFIN",
"-DHAVE_UPDATE_ASSETS", "-DHAVE_UPDATE_ASSETS",
"-DHAVE_LANGEXTRA", "-DHAVE_LANGEXTRA",
"-DHAVE_CHEEVOS", "-DHAVE_CHEEVOS",
"-DHAVE_STB_FONT", "-DHAVE_STB_FONT",
@ -306,7 +306,7 @@
"-DHAVE_COCOA", "-DHAVE_COCOA",
"-DHAVE_MAIN", "-DHAVE_MAIN",
"-DSINC_LOWER_QUALITY", "-DSINC_LOWER_QUALITY",
"-DHAVE_NETWORKGAMEPAD", "-DHAVE_NETWORKGAMEPAD",
"-DHAVE_NETWORKING", "-DHAVE_NETWORKING",
"-DRARCH_INTERNAL", "-DRARCH_INTERNAL",
"-DHAVE_THREADS", "-DHAVE_THREADS",
@ -355,7 +355,7 @@
"-DNS_BLOCK_ASSERTIONS=1", "-DNS_BLOCK_ASSERTIONS=1",
"-DNDEBUG", "-DNDEBUG",
"-DHAVE_GRIFFIN", "-DHAVE_GRIFFIN",
"-DHAVE_UPDATE_ASSETS", "-DHAVE_UPDATE_ASSETS",
"-DHAVE_LANGEXTRA", "-DHAVE_LANGEXTRA",
"-DHAVE_CHEEVOS", "-DHAVE_CHEEVOS",
"-DHAVE_STB_FONT", "-DHAVE_STB_FONT",
@ -379,7 +379,7 @@
"-DHAVE_COCOA", "-DHAVE_COCOA",
"-DHAVE_MAIN", "-DHAVE_MAIN",
"-DSINC_LOWER_QUALITY", "-DSINC_LOWER_QUALITY",
"-DHAVE_NETWORKGAMEPAD", "-DHAVE_NETWORKGAMEPAD",
"-DHAVE_NETWORKING", "-DHAVE_NETWORKING",
"-DRARCH_INTERNAL", "-DRARCH_INTERNAL",
"-DHAVE_THREADS", "-DHAVE_THREADS",