From feb3bd079449d19f6ecc266c3ffd957b5d3c8cb1 Mon Sep 17 00:00:00 2001 From: jdgleaver Date: Fri, 23 Oct 2020 23:43:23 +0100 Subject: [PATCH] Refactor Scale2x video filter (crash fix + slight performance improvement) --- gfx/video_filters/scale2x.c | 237 ++++++++++++++++++------------------ 1 file changed, 117 insertions(+), 120 deletions(-) diff --git a/gfx/video_filters/scale2x.c b/gfx/video_filters/scale2x.c index e1125be4bc..ecba9d35a8 100644 --- a/gfx/video_filters/scale2x.c +++ b/gfx/video_filters/scale2x.c @@ -1,6 +1,6 @@ /* RetroArch - A frontend for libretro. * Copyright (C) 2010-2014 - Hans-Kristian Arntzen - * Copyright (C) 2011-2017 - Daniel De Matteis + * Copyright (C) 2011-2018 - Daniel De Matteis * * RetroArch is free software: you can redistribute it and/or modify it under the terms * of the GNU General Public License as published by the Free Software Found- @@ -18,6 +18,7 @@ #include "softfilter.h" #include +#include #ifdef RARCH_INTERNAL #define softfilter_get_implementation scale2x_get_implementation @@ -25,8 +26,6 @@ #define filter_data scale2x_filter_data #endif -#define SCALE2X_SCALE 2 - struct softfilter_thread_data { void *out_data; @@ -47,67 +46,6 @@ struct filter_data unsigned in_fmt; }; -#define SCALE2X_GENERIC(typename_t, width, height, first, last, src, src_stride, dst, dst_stride, out0, out1) \ - for (y = 0; y < height; ++y) \ - { \ - const int prevline = ((y == 0) && first) ? 0 : src_stride; \ - const int nextline = ((y == height - 1) && last) ? 0 : src_stride; \ - \ - for (x = 0; x < width; ++x) \ - { \ - const typename_t A = *(src - prevline); \ - const typename_t B = (x > 0) ? *(src - 1) : *src; \ - const typename_t C = *src; \ - const typename_t D = (x < width - 1) ? *(src + 1) : *src; \ - const typename_t E = *(src++ + nextline); \ - \ - if (A != E && B != D) \ - { \ - *out0++ = (A == B ? A : C); \ - *out0++ = (A == D ? A : C); \ - *out1++ = (E == B ? E : C); \ - *out1++ = (E == D ? E : C); \ - } \ - else \ - { \ - *out0++ = C; \ - *out0++ = C; \ - *out1++ = C; \ - *out1++ = C; \ - } \ - } \ - \ - src += src_stride - width; \ - out0 += dst_stride + dst_stride - (width * SCALE2X_SCALE); \ - out1 += dst_stride + dst_stride - (width * SCALE2X_SCALE); \ - } - -static void scale2x_generic_rgb565(unsigned width, unsigned height, - int first, int last, - const uint16_t *src, unsigned src_stride, - uint16_t *dst, unsigned dst_stride) -{ - unsigned x, y; - uint16_t *out0, *out1; - out0 = (uint16_t*)dst; - out1 = (uint16_t*)(dst + dst_stride); - SCALE2X_GENERIC(uint16_t, width, height, first, last, - src, src_stride, dst, dst_stride, out0, out1); -} - -static void scale2x_generic_xrgb8888(unsigned width, unsigned height, - int first, int last, - const uint32_t *src, unsigned src_stride, - uint32_t *dst, unsigned dst_stride) -{ - unsigned x, y; - uint32_t *out0 = (uint32_t*)dst; - uint32_t *out1 = (uint32_t*)(dst + dst_stride); - - SCALE2X_GENERIC(uint32_t, width, height, first, last, - src, src_stride, dst, dst_stride, out0, out1); -} - static unsigned scale2x_generic_input_fmts(void) { return SOFTFILTER_FMT_XRGB8888 | SOFTFILTER_FMT_RGB565; @@ -133,14 +71,16 @@ static void *scale2x_generic_create(const struct softfilter_config *config, (void)simd; (void)config; (void)userdata; - if (!filt) + + if (!filt) { return NULL; - filt->workers = (struct softfilter_thread_data*) - calloc(threads, sizeof(struct softfilter_thread_data)); + } + /* Apparently the code is not thread-safe, + * so force single threaded operation... */ + filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data)); filt->threads = 1; filt->in_fmt = in_fmt; - if (!filt->workers) - { + if (!filt->workers) { free(filt); return NULL; } @@ -151,51 +91,114 @@ static void scale2x_generic_output(void *data, unsigned *out_width, unsigned *out_height, unsigned width, unsigned height) { - *out_width = width * SCALE2X_SCALE; - *out_height = height * SCALE2X_SCALE; + *out_width = width << 1; + *out_height = height << 1; } static void scale2x_generic_destroy(void *data) { struct filter_data *filt = (struct filter_data*)data; - - if (!filt) + if (!filt) { return; - + } free(filt->workers); free(filt); } static void scale2x_work_cb_xrgb8888(void *data, void *thread_data) { - struct softfilter_thread_data *thr = - (struct softfilter_thread_data*)thread_data; - const uint32_t *input = (const uint32_t*)thr->in_data; - uint32_t *output = (uint32_t*)thr->out_data; - unsigned width = thr->width; - unsigned height = thr->height; + struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data; + uint32_t in_stride = (uint32_t)(thr->in_pitch >> 2); + uint32_t out_stride = (uint32_t)(thr->out_pitch >> 2); + const uint32_t *input = (const uint32_t*)thr->in_data; + uint32_t *output0 = (uint32_t*)thr->out_data; + uint32_t *output1 = (uint32_t*)thr->out_data + out_stride; + uint16_t x, y; - scale2x_generic_xrgb8888(width, height, - thr->first, thr->last, input, - (unsigned)(thr->in_pitch / SOFTFILTER_BPP_XRGB8888), - output, - (unsigned)(thr->out_pitch / SOFTFILTER_BPP_XRGB8888)); + for (y = 0; y < thr->height; y++) + { + /* Determine offsets of previous/next source lines */ + uint32_t line_prev = (y == 0) ? 0 : in_stride; + uint32_t line_next = (y == thr->height - 1) ? 0 : in_stride; + + for (x = 0; x < thr->width; x++) + { + /* Get sample points */ + uint32_t A = *(input - line_prev); + uint32_t B = (x > 0) ? *(input - 1) : *input; + uint32_t C = *input; + uint32_t D = (x < thr->width - 1) ? *(input + 1) : *input; + uint32_t E = *(input++ + line_next); + + /* Apply pixel expansion algorithm */ + if (A != E && B != D) + { + *output0++ = (A == B ? A : C); + *output0++ = (A == D ? A : C); + *output1++ = (E == B ? E : C); + *output1++ = (E == D ? E : C); + } + else + { + *output0++ = C; + *output0++ = C; + *output1++ = C; + *output1++ = C; + } + } + + input += in_stride - thr->width; + output0 += (out_stride << 1) - (thr->width << 1); + output1 += (out_stride << 1) - (thr->width << 1); + } } static void scale2x_work_cb_rgb565(void *data, void *thread_data) { - struct softfilter_thread_data *thr = - (struct softfilter_thread_data*)thread_data; - const uint16_t *input = (const uint16_t*)thr->in_data; - uint16_t *output = (uint16_t*)thr->out_data; - unsigned width = thr->width; - unsigned height = thr->height; + struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data; + uint32_t in_stride = (uint32_t)(thr->in_pitch >> 1); + uint32_t out_stride = (uint32_t)(thr->out_pitch >> 1); + const uint16_t *input = (const uint16_t*)thr->in_data; + uint16_t *output0 = (uint16_t*)thr->out_data; + uint16_t *output1 = (uint16_t*)thr->out_data + out_stride; + uint16_t x, y; - scale2x_generic_rgb565(width, height, - thr->first, thr->last, input, - (unsigned)(thr->in_pitch / SOFTFILTER_BPP_RGB565), - output, - (unsigned)(thr->out_pitch / SOFTFILTER_BPP_RGB565)); + for (y = 0; y < thr->height; y++) + { + /* Determine offsets of previous/next source lines */ + uint32_t line_prev = (y == 0) ? 0 : in_stride; + uint32_t line_next = (y == thr->height - 1) ? 0 : in_stride; + + for (x = 0; x < thr->width; x++) + { + /* Get sample points */ + uint16_t A = *(input - line_prev); + uint16_t B = (x > 0) ? *(input - 1) : *input; + uint16_t C = *input; + uint16_t D = (x < thr->width - 1) ? *(input + 1) : *input; + uint16_t E = *(input++ + line_next); + + /* Apply pixel expansion algorithm */ + if (A != E && B != D) + { + *output0++ = (A == B ? A : C); + *output0++ = (A == D ? A : C); + *output1++ = (E == B ? E : C); + *output1++ = (E == D ? E : C); + } + else + { + *output0++ = C; + *output0++ = C; + *output1++ = C; + *output1++ = C; + } + } + + input += in_stride - thr->width; + output0 += (out_stride << 1) - (thr->width << 1); + output1 += (out_stride << 1) - (thr->width << 1); + } } static void scale2x_generic_packets(void *data, @@ -203,34 +206,27 @@ static void scale2x_generic_packets(void *data, void *output, size_t output_stride, const void *input, unsigned width, unsigned height, size_t input_stride) { + /* We are guaranteed single threaded operation + * (filt->threads = 1) so we don't need to loop + * over threads and can cull some code. This only + * makes the tiniest performance difference, but + * every little helps when running on an o3DS... */ struct filter_data *filt = (struct filter_data*)data; - unsigned i; - for (i = 0; i < filt->threads; i++) - { - struct softfilter_thread_data *thr = - (struct softfilter_thread_data*)&filt->workers[i]; + struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0]; - unsigned y_start = (height * i) / filt->threads; - unsigned y_end = (height * (i + 1)) / filt->threads; - thr->out_data = (uint8_t*)output + y_start * - SCALE2X_SCALE * output_stride; - thr->in_data = (const uint8_t*)input + y_start * input_stride; - thr->out_pitch = output_stride; - thr->in_pitch = input_stride; - thr->width = width; - thr->height = y_end - y_start; + thr->out_data = (uint8_t*)output; + thr->in_data = (const uint8_t*)input; + thr->out_pitch = output_stride; + thr->in_pitch = input_stride; + thr->width = width; + thr->height = height; - /* Workers need to know if they can access pixels - * outside their given buffer. */ - thr->first = y_start; - thr->last = y_end == height; - - if (filt->in_fmt == SOFTFILTER_FMT_XRGB8888) - packets[i].work = scale2x_work_cb_xrgb8888; - else if (filt->in_fmt == SOFTFILTER_FMT_RGB565) - packets[i].work = scale2x_work_cb_rgb565; - packets[i].thread_data = thr; + if (filt->in_fmt == SOFTFILTER_FMT_XRGB8888) { + packets[0].work = scale2x_work_cb_xrgb8888; + } else if (filt->in_fmt == SOFTFILTER_FMT_RGB565) { + packets[0].work = scale2x_work_cb_rgb565; } + packets[0].thread_data = thr; } static const struct softfilter_implementation scale2x_generic = { @@ -243,6 +239,7 @@ static const struct softfilter_implementation scale2x_generic = { scale2x_generic_threads, scale2x_generic_output, scale2x_generic_packets, + SOFTFILTER_API_VERSION, "Scale2x", "scale2x",