diff --git a/Makefile.common b/Makefile.common index 181e74cf7c..ee4c445017 100644 --- a/Makefile.common +++ b/Makefile.common @@ -2485,6 +2485,7 @@ ifeq ($(HAVE_STATIC_VIDEO_FILTERS), 1) gfx/video_filters/dot_matrix_3x.o \ gfx/video_filters/dot_matrix_4x.o \ gfx/video_filters/upscale_1_5x.o \ + gfx/video_filters/upscale_1_66x_fast.o \ gfx/video_filters/upscale_256x_320x240.o \ gfx/video_filters/picoscale_256x_320x240.o \ gfx/video_filters/upscale_240x160_320x240.o \ diff --git a/gfx/video_filter.c b/gfx/video_filter.c index 3d07d3d75a..eb0da93edc 100644 --- a/gfx/video_filter.c +++ b/gfx/video_filter.c @@ -287,6 +287,7 @@ extern const struct softfilter_implementation *gameboy4x_get_implementation(soft extern const struct softfilter_implementation *dot_matrix_3x_get_implementation(softfilter_simd_mask_t simd); extern const struct softfilter_implementation *dot_matrix_4x_get_implementation(softfilter_simd_mask_t simd); extern const struct softfilter_implementation *upscale_1_5x_get_implementation(softfilter_simd_mask_t simd); +extern const struct softfilter_implementation *upscale_1_66x_fast_get_implementation(softfilter_simd_mask_t simd); extern const struct softfilter_implementation *upscale_256x_320x240_get_implementation(softfilter_simd_mask_t simd); extern const struct softfilter_implementation *picoscale_256x_320x240_get_implementation(softfilter_simd_mask_t simd); extern const struct softfilter_implementation *upscale_240x160_320x240_get_implementation(softfilter_simd_mask_t simd); @@ -315,6 +316,7 @@ static const softfilter_get_implementation_t soft_plugs_builtin[] = { dot_matrix_3x_get_implementation, dot_matrix_4x_get_implementation, upscale_1_5x_get_implementation, + upscale_1_66x_fast_get_implementation, upscale_256x_320x240_get_implementation, picoscale_256x_320x240_get_implementation, upscale_240x160_320x240_get_implementation, diff --git a/gfx/video_filters/Makefile b/gfx/video_filters/Makefile index 7bd4a46709..6ca8585bd8 100644 --- a/gfx/video_filters/Makefile +++ b/gfx/video_filters/Makefile @@ -126,6 +126,7 @@ objects += blargg_ntsc_snes.$(DYLIB) \ dot_matrix_3x.$(DYLIB) \ dot_matrix_4x.$(DYLIB) \ upscale_1_5x.$(DYLIB) \ + upscale_1_66x_fast.$(DYLIB) \ upscale_256x_320x240.$(DYLIB) \ picoscale_256x_320x240.$(DYLIB) \ upscale_240x160_320x240.$(DYLIB) \ diff --git a/gfx/video_filters/Upscale1.66x_fast.filt b/gfx/video_filters/Upscale1.66x_fast.filt new file mode 100644 index 0000000000..06a2ae9841 --- /dev/null +++ b/gfx/video_filters/Upscale1.66x_fast.filt @@ -0,0 +1 @@ +filter = upscale_1_66x_fast diff --git a/gfx/video_filters/upscale_1_66x_fast.c b/gfx/video_filters/upscale_1_66x_fast.c new file mode 100644 index 0000000000..dd9b6ff229 --- /dev/null +++ b/gfx/video_filters/upscale_1_66x_fast.c @@ -0,0 +1,293 @@ +/* RetroArch - A frontend for libretro. + * Copyright (C) 2010-2014 - Hans-Kristian Arntzen + * Copyright (C) 2011-2018 - Daniel De Matteis + * + * RetroArch is free software: you can redistribute it and/or modify it under the terms + * of the GNU General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with RetroArch. + * If not, see . + */ + +/* Compile: gcc -o upscale_1_66x_fast.so -shared upscale_1_66x_fast.c -std=c99 -O3 -Wall -pedantic -fPIC */ + +#include "softfilter.h" +#include +#include + +#ifdef RARCH_INTERNAL +#define softfilter_get_implementation upscale_1_66x_fast_get_implementation +#define softfilter_thread_data upscale_1_66x_fast_softfilter_thread_data +#define filter_data upscale_1_66x_fast_filter_data +#endif + +struct softfilter_thread_data +{ + void *out_data; + const void *in_data; + size_t out_pitch; + size_t in_pitch; + unsigned colfmt; + unsigned width; + unsigned height; + int first; + int last; +}; + +struct filter_data +{ + unsigned threads; + struct softfilter_thread_data *workers; + unsigned in_fmt; +}; + +static unsigned upscale_1_66x_fast_generic_input_fmts(void) +{ + return SOFTFILTER_FMT_RGB565; +} + +static unsigned upscale_1_66x_fast_generic_output_fmts(unsigned input_fmts) +{ + return input_fmts; +} + +static unsigned upscale_1_66x_fast_generic_threads(void *data) +{ + struct filter_data *filt = (struct filter_data*)data; + return filt->threads; +} + +static void *upscale_1_66x_fast_generic_create(const struct softfilter_config *config, + unsigned in_fmt, unsigned out_fmt, + unsigned max_width, unsigned max_height, + unsigned threads, softfilter_simd_mask_t simd, void *userdata) +{ + struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt)); + if (!filt) + return NULL; + if (!(filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data)))) + { + free(filt); + return NULL; + } + /* Apparently the code is not thread-safe, + * so force single threaded operation... */ + filt->threads = 1; + filt->in_fmt = in_fmt; + return filt; +} + +static void upscale_1_66x_fast_generic_output(void *data, + unsigned *out_width, unsigned *out_height, + unsigned width, unsigned height) +{ + *out_width = ((int)(width/3))*5; + *out_height = ((int)(height/3))*5; +} + +static void upscale_1_66x_fast_generic_destroy(void *data) +{ + struct filter_data *filt = (struct filter_data*)data; + if (!filt) + return; + free(filt->workers); + free(filt); +} + +/* + * Approximately bilinear scalers + * + * Copyright (C) 2019 hi-ban, Nebuleon + * + * This function and all auxiliary functions are free software; you can + * redistribute them and/or modify them under the terms of the GNU Lesser + * General Public License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * These functions are distributed in the hope that they will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +//from RGB565 +#define cR(A) (((A) & 0xf800) >> 11) +#define cG(A) (((A) & 0x7e0) >> 5) +#define cB(A) ((A) & 0x1f) +//to RGB565 +#define Weight1_1(A, B) ((((cR(A) + cR(B)) >> 1) & 0x1f) << 11 | (((cG(A) + cG(B)) >> 1) & 0x3f) << 5 | (((cB(A) + cB(B)) >> 1) & 0x1f)) +#define Weight1_2(A, B) ((((cR(A) + (cR(B) << 1)) / 3) & 0x1f) << 11 | (((cG(A) + (cG(B) << 1)) / 3) & 0x3f) << 5 | (((cB(A) + (cB(B) << 1)) / 3) & 0x1f)) +#define Weight2_1(A, B) ((((cR(B) + (cR(A) << 1)) / 3) & 0x1f) << 11 | (((cG(B) + (cG(A) << 1)) / 3) & 0x3f) << 5 | (((cB(B) + (cB(A) << 1)) / 3) & 0x1f)) +#define Weight1_3(A, B) ((((cR(A) + (cR(B) * 3)) >> 2) & 0x1f) << 11 | (((cG(A) + (cG(B) * 3)) >> 2) & 0x3f) << 5 | (((cB(A) + (cB(B) * 3)) >> 2) & 0x1f)) +#define Weight3_1(A, B) ((((cR(B) + (cR(A) * 3)) >> 2) & 0x1f) << 11 | (((cG(B) + (cG(A) * 3)) >> 2) & 0x3f) << 5 | (((cB(B) + (cB(A) * 3)) >> 2) & 0x1f)) +#define Weight1_4(A, B) ((((cR(A) + (cR(B) << 2)) / 5) & 0x1f) << 11 | (((cG(A) + (cG(B) << 2)) / 5) & 0x3f) << 5 | (((cB(A) + (cB(B) << 2)) / 5) & 0x1f)) +#define Weight4_1(A, B) ((((cR(B) + (cR(A) << 2)) / 5) & 0x1f) << 11 | (((cG(B) + (cG(A) << 2)) / 5) & 0x3f) << 5 | (((cB(B) + (cB(A) << 2)) / 5) & 0x1f)) +#define Weight2_3(A, B) (((((cR(A) << 1) + (cR(B) * 3)) / 5) & 0x1f) << 11 | ((((cG(A) << 1) + (cG(B) * 3)) / 5) & 0x3f) << 5 | ((((cB(A) << 1) + (cB(B) * 3)) / 5) & 0x1f)) +#define Weight3_2(A, B) (((((cR(B) << 1) + (cR(A) * 3)) / 5) & 0x1f) << 11 | ((((cG(B) << 1) + (cG(A) * 3)) / 5) & 0x3f) << 5 | ((((cB(B) << 1) + (cB(A) * 3)) / 5) & 0x1f)) +#define Weight1_1_1_1(A, B, C, D) ((((cR(A) + cR(B) + cR(C) + cR(D)) >> 2) & 0x1f) << 11 | (((cG(A) + cG(B) + cG(C) + cG(D)) >> 2) & 0x3f) << 5 | (((cB(A) + cB(B) + cB(C) + cB(D)) >> 2) & 0x1f)) + + +static void upscale_1_66x_fast_work_cb_rgb565(void *data, void *thread_data) +{ + struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data; + const uint16_t *input = (const uint16_t*)thr->in_data; + uint16_t *output = (uint16_t*)thr->out_data; + uint16_t in_stride = (uint16_t)(thr->in_pitch >> 1); + uint16_t out_stride = (uint16_t)(thr->out_pitch >> 1); + uint16_t x, y; + + uint16_t _1; + uint16_t _2; + uint16_t _3; + uint16_t _4; + uint16_t _5; + uint16_t _6; + uint16_t _7; + uint16_t _8; + uint16_t _9; + + /* Before: + * a b c + * d e f + * g h i + * + * After (parenthesis = average): + * a (aab) b (bcc) c + * (aad) (...) (bbe) (...) (ccf) + * d (dde) e (eff) f + * (dgg) (...) (ehh) (...) (fii) + * g (ggh) h (hii) i + */ + + + for (y = 0; y < thr->height / 3; y++) + { + uint16_t *out_ptr = output; + for (x = 0; x < thr->width / 3; x++) + { + const uint16_t *in_line_ptr = input + x*3; + uint16_t *out_line_ptr = out_ptr; + + _1 = *in_line_ptr; + _2 = *(in_line_ptr + 1); + _3 = *(in_line_ptr + 2); + in_line_ptr += in_stride; + + _4 = *in_line_ptr; + _5 = *(in_line_ptr + 1); + _6 = *(in_line_ptr + 2); + in_line_ptr += in_stride; + + _7 = *in_line_ptr; + _8 = *(in_line_ptr + 1); + _9 = *(in_line_ptr + 2); + + /* Row 1 */ + *out_line_ptr = _1; + *(out_line_ptr + 1) = Weight2_1( _1, _2); + *(out_line_ptr + 2) = _2; + *(out_line_ptr + 3) = Weight1_2( _2, _3); + *(out_line_ptr + 4) = _3; + out_line_ptr += out_stride; + + /* Row 2 */ + *out_line_ptr = Weight2_1( _1, _4); + *(out_line_ptr + 1) = Weight2_1(Weight2_1( _1, _2), Weight2_1( _4, _5)); + *(out_line_ptr + 2) = Weight2_1( _2, _5); + *(out_line_ptr + 3) = Weight2_1(Weight1_2( _2, _3), Weight1_2( _5, _6)); + *(out_line_ptr + 4) = Weight2_1( _3, _6); + out_line_ptr += out_stride; + + /* Row 3 */ + *out_line_ptr = _4; + *(out_line_ptr + 1) = Weight2_1( _4, _5); + *(out_line_ptr + 2) = _5; + *(out_line_ptr + 3) = Weight1_2( _5, _6); + *(out_line_ptr + 4) = _6; + out_line_ptr += out_stride; + + /* Row 4 */ + *out_line_ptr = Weight1_2( _4, _7); + *(out_line_ptr + 1) = Weight1_2(Weight2_1( _4, _5), Weight2_1( _7, _8)); + *(out_line_ptr + 2) = Weight1_2( _5, _8); + *(out_line_ptr + 3) = Weight1_2(Weight1_2( _5, _6), Weight1_2( _8, _9)); + *(out_line_ptr + 4) = Weight1_2( _6, _9); + out_line_ptr += out_stride; + + /* Row 5 */ + *out_line_ptr = _7; + *(out_line_ptr + 1) = Weight2_1( _7, _8); + *(out_line_ptr + 2) = _8; + *(out_line_ptr + 3) = Weight1_2( _8, _9); + *(out_line_ptr + 4) = _9; + + out_ptr += 5; + } + + input += in_stride * 3; + output += out_stride * 5; + } +} + +static void upscale_1_66x_fast_generic_packets(void *data, + struct softfilter_work_packet *packets, + void *output, size_t output_stride, + const void *input, unsigned width, unsigned height, size_t input_stride) +{ + /* We are guaranteed single threaded operation + * (filt->threads = 1) so we don't need to loop + * over threads and can cull some code. This only + * makes the tiniest performance difference, but + * every little helps when running on an o3DS... */ + struct filter_data *filt = (struct filter_data*)data; + struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0]; + + thr->out_data = (uint8_t*)output; + thr->in_data = (const uint8_t*)input; + thr->out_pitch = output_stride; + thr->in_pitch = input_stride; + thr->width = width; + thr->height = height; + + if (filt->in_fmt == SOFTFILTER_FMT_RGB565) + packets[0].work = upscale_1_66x_fast_work_cb_rgb565; + packets[0].thread_data = thr; +} + +static const struct softfilter_implementation upscale_1_66x_fast_generic = { + upscale_1_66x_fast_generic_input_fmts, + upscale_1_66x_fast_generic_output_fmts, + + upscale_1_66x_fast_generic_create, + upscale_1_66x_fast_generic_destroy, + + upscale_1_66x_fast_generic_threads, + upscale_1_66x_fast_generic_output, + upscale_1_66x_fast_generic_packets, + + SOFTFILTER_API_VERSION, + "Upscale1.66x_fast", + "upscale_1_66x_fast", +}; + +const struct softfilter_implementation *softfilter_get_implementation( + softfilter_simd_mask_t simd) +{ + return &upscale_1_66x_fast_generic; +} + +#ifdef RARCH_INTERNAL +#undef softfilter_get_implementation +#undef softfilter_thread_data +#undef filter_data +#endif + diff --git a/griffin/griffin.c b/griffin/griffin.c index a8aafe2ad0..30d83c1893 100644 --- a/griffin/griffin.c +++ b/griffin/griffin.c @@ -999,6 +999,7 @@ FILTERS #include "../gfx/video_filters/dot_matrix_3x.c" #include "../gfx/video_filters/dot_matrix_4x.c" #include "../gfx/video_filters/upscale_1_5x.c" +#include "../gfx/video_filters/upscale_1_66x_fast.c" #include "../gfx/video_filters/upscale_256x_320x240.c" #include "../gfx/video_filters/picoscale_256x_320x240.c" #include "../gfx/video_filters/upscale_240x160_320x240.c"