diff --git a/Makefile.common b/Makefile.common
index b8b2c27784..fe1b5eb13f 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -2261,6 +2261,7 @@ ifeq ($(HAVE_STATIC_VIDEO_FILTERS), 1)
gfx/video_filters/normal2x.o \
gfx/video_filters/normal2x_width.o \
gfx/video_filters/normal2x_height.o \
+ gfx/video_filters/normal4x.o \
gfx/video_filters/scanline2x.o \
gfx/video_filters/grid2x.o \
gfx/video_filters/grid3x.o \
diff --git a/gfx/video_filter.c b/gfx/video_filter.c
index 4c92a127f7..0e45185895 100644
--- a/gfx/video_filter.c
+++ b/gfx/video_filter.c
@@ -282,6 +282,7 @@ extern const struct softfilter_implementation *scale2x_get_implementation(softfi
extern const struct softfilter_implementation *normal2x_get_implementation(softfilter_simd_mask_t simd);
extern const struct softfilter_implementation *normal2x_width_get_implementation(softfilter_simd_mask_t simd);
extern const struct softfilter_implementation *normal2x_height_get_implementation(softfilter_simd_mask_t simd);
+extern const struct softfilter_implementation *normal4x_get_implementation(softfilter_simd_mask_t simd);
extern const struct softfilter_implementation *scanline2x_get_implementation(softfilter_simd_mask_t simd);
extern const struct softfilter_implementation *grid2x_get_implementation(softfilter_simd_mask_t simd);
extern const struct softfilter_implementation *grid3x_get_implementation(softfilter_simd_mask_t simd);
@@ -304,6 +305,7 @@ static const softfilter_get_implementation_t soft_plugs_builtin[] = {
normal2x_get_implementation,
normal2x_width_get_implementation,
normal2x_height_get_implementation,
+ normal4x_get_implementation,
scanline2x_get_implementation,
grid2x_get_implementation,
grid3x_get_implementation,
diff --git a/gfx/video_filters/Makefile b/gfx/video_filters/Makefile
index 64fda8c7e5..9b3f246b78 100644
--- a/gfx/video_filters/Makefile
+++ b/gfx/video_filters/Makefile
@@ -70,7 +70,7 @@ endif
objects += blargg_ntsc_snes.$(DYLIB) phosphor2x.$(DYLIB) epx.$(DYLIB) lq2x.$(DYLIB) \
2xsai.$(DYLIB) super2xsai.$(DYLIB) supereagle.$(DYLIB) 2xbr.$(DYLIB) \
darken.$(DYLIB) scale2x.$(DYLIB) normal2x.$(DYLIB) \
- normal2x_width.$(DYLIB) normal2x_height.$(DYLIB) \
+ normal2x_width.$(DYLIB) normal2x_height.$(DYLIB) normal4x.$(DYLIB) \
scanline2x.$(DYLIB) grid2x.$(DYLIB) grid3x.$(DYLIB) \
gameboy3x.$(DYLIB) gameboy4x.$(DYLIB) \
dot_matrix_3x.$(DYLIB) dot_matrix_4x.$(DYLIB)
diff --git a/gfx/video_filters/Normal4x.filt b/gfx/video_filters/Normal4x.filt
new file mode 100644
index 0000000000..761508de55
--- /dev/null
+++ b/gfx/video_filters/Normal4x.filt
@@ -0,0 +1 @@
+filter = normal4x
diff --git a/gfx/video_filters/normal4x.c b/gfx/video_filters/normal4x.c
new file mode 100644
index 0000000000..63750e5ebc
--- /dev/null
+++ b/gfx/video_filters/normal4x.c
@@ -0,0 +1,255 @@
+/* RetroArch - A frontend for libretro.
+ * Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ * Copyright (C) 2011-2018 - Daniel De Matteis
+ *
+ * RetroArch is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with RetroArch.
+ * If not, see .
+ */
+
+/* Compile: gcc -o normal4x.so -shared normal4x.c -std=c99 -O3 -Wall -pedantic -fPIC */
+
+#include "softfilter.h"
+#include
+#include
+
+#ifdef RARCH_INTERNAL
+#define softfilter_get_implementation normal4x_get_implementation
+#define softfilter_thread_data normal4x_softfilter_thread_data
+#define filter_data normal4x_filter_data
+#endif
+
+struct softfilter_thread_data
+{
+ void *out_data;
+ const void *in_data;
+ size_t out_pitch;
+ size_t in_pitch;
+ unsigned colfmt;
+ unsigned width;
+ unsigned height;
+ int first;
+ int last;
+};
+
+struct filter_data
+{
+ unsigned threads;
+ struct softfilter_thread_data *workers;
+ unsigned in_fmt;
+};
+
+static unsigned normal4x_generic_input_fmts(void)
+{
+ return SOFTFILTER_FMT_XRGB8888 | SOFTFILTER_FMT_RGB565;
+}
+
+static unsigned normal4x_generic_output_fmts(unsigned input_fmts)
+{
+ return input_fmts;
+}
+
+static unsigned normal4x_generic_threads(void *data)
+{
+ struct filter_data *filt = (struct filter_data*)data;
+ return filt->threads;
+}
+
+static void *normal4x_generic_create(const struct softfilter_config *config,
+ unsigned in_fmt, unsigned out_fmt,
+ unsigned max_width, unsigned max_height,
+ unsigned threads, softfilter_simd_mask_t simd, void *userdata)
+{
+ struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt));
+ (void)simd;
+ (void)config;
+ (void)userdata;
+
+ if (!filt) {
+ return NULL;
+ }
+ /* Apparently the code is not thread-safe,
+ * so force single threaded operation... */
+ filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data));
+ filt->threads = 1;
+ filt->in_fmt = in_fmt;
+ if (!filt->workers) {
+ free(filt);
+ return NULL;
+ }
+ return filt;
+}
+
+static void normal4x_generic_output(void *data,
+ unsigned *out_width, unsigned *out_height,
+ unsigned width, unsigned height)
+{
+ *out_width = width << 2;
+ *out_height = height << 2;
+}
+
+static void normal4x_generic_destroy(void *data)
+{
+ struct filter_data *filt = (struct filter_data*)data;
+ if (!filt) {
+ return;
+ }
+ free(filt->workers);
+ free(filt);
+}
+
+static void normal4x_work_cb_xrgb8888(void *data, void *thread_data)
+{
+ struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
+ const uint32_t *input = (const uint32_t*)thr->in_data;
+ uint32_t *output = (uint32_t*)thr->out_data;
+ uint32_t in_stride = (uint32_t)(thr->in_pitch >> 2);
+ uint32_t out_stride = (uint32_t)(thr->out_pitch >> 2);
+ uint32_t x, y;
+
+ for (y = 0; y < thr->height; ++y)
+ {
+ uint32_t *out_ptr = output;
+ for (x = 0; x < thr->width; ++x)
+ {
+ uint32_t *out_line_ptr = out_ptr;
+ uint32_t color = *(input + x);
+ uint32_t row_color[4];
+
+ row_color[0] = color;
+ row_color[1] = color;
+ row_color[2] = color;
+ row_color[3] = color;
+
+ /* Row 1 */
+ memcpy(out_line_ptr, row_color, sizeof(row_color));
+ out_line_ptr += out_stride;
+
+ /* Row 2 */
+ memcpy(out_line_ptr, row_color, sizeof(row_color));
+ out_line_ptr += out_stride;
+
+ /* Row 3 */
+ memcpy(out_line_ptr, row_color, sizeof(row_color));
+ out_line_ptr += out_stride;
+
+ /* Row 4 */
+ memcpy(out_line_ptr, row_color, sizeof(row_color));
+
+ out_ptr += 4;
+ }
+
+ input += in_stride;
+ output += out_stride << 2;
+ }
+}
+
+static void normal4x_work_cb_rgb565(void *data, void *thread_data)
+{
+ struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
+ const uint16_t *input = (const uint16_t*)thr->in_data;
+ uint16_t *output = (uint16_t*)thr->out_data;
+ uint16_t in_stride = (uint16_t)(thr->in_pitch >> 1);
+ uint16_t out_stride = (uint16_t)(thr->out_pitch >> 1);
+ uint16_t x, y;
+
+ for (y = 0; y < thr->height; ++y)
+ {
+ uint16_t *out_ptr = output;
+ for (x = 0; x < thr->width; ++x)
+ {
+ uint16_t *out_line_ptr = out_ptr;
+ uint16_t color = *(input + x);
+ uint16_t row_color[4];
+
+ row_color[0] = color;
+ row_color[1] = color;
+ row_color[2] = color;
+ row_color[3] = color;
+
+ /* Row 1 */
+ memcpy(out_line_ptr, row_color, sizeof(row_color));
+ out_line_ptr += out_stride;
+
+ /* Row 2 */
+ memcpy(out_line_ptr, row_color, sizeof(row_color));
+ out_line_ptr += out_stride;
+
+ /* Row 3 */
+ memcpy(out_line_ptr, row_color, sizeof(row_color));
+ out_line_ptr += out_stride;
+
+ /* Row 4 */
+ memcpy(out_line_ptr, row_color, sizeof(row_color));
+
+ out_ptr += 4;
+ }
+
+ input += in_stride;
+ output += out_stride << 2;
+ }
+}
+
+static void normal4x_generic_packets(void *data,
+ struct softfilter_work_packet *packets,
+ void *output, size_t output_stride,
+ const void *input, unsigned width, unsigned height, size_t input_stride)
+{
+ /* We are guaranteed single threaded operation
+ * (filt->threads = 1) so we don't need to loop
+ * over threads and can cull some code. This only
+ * makes the tiniest performance difference, but
+ * every little helps when running on an o3DS... */
+ struct filter_data *filt = (struct filter_data*)data;
+ struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0];
+
+ thr->out_data = (uint8_t*)output;
+ thr->in_data = (const uint8_t*)input;
+ thr->out_pitch = output_stride;
+ thr->in_pitch = input_stride;
+ thr->width = width;
+ thr->height = height;
+
+ if (filt->in_fmt == SOFTFILTER_FMT_XRGB8888) {
+ packets[0].work = normal4x_work_cb_xrgb8888;
+ } else if (filt->in_fmt == SOFTFILTER_FMT_RGB565) {
+ packets[0].work = normal4x_work_cb_rgb565;
+ }
+ packets[0].thread_data = thr;
+}
+
+static const struct softfilter_implementation normal4x_generic = {
+ normal4x_generic_input_fmts,
+ normal4x_generic_output_fmts,
+
+ normal4x_generic_create,
+ normal4x_generic_destroy,
+
+ normal4x_generic_threads,
+ normal4x_generic_output,
+ normal4x_generic_packets,
+
+ SOFTFILTER_API_VERSION,
+ "Normal4x",
+ "normal4x",
+};
+
+const struct softfilter_implementation *softfilter_get_implementation(
+ softfilter_simd_mask_t simd)
+{
+ (void)simd;
+ return &normal4x_generic;
+}
+
+#ifdef RARCH_INTERNAL
+#undef softfilter_get_implementation
+#undef softfilter_thread_data
+#undef filter_data
+#endif
diff --git a/griffin/griffin.c b/griffin/griffin.c
index bc1828d53b..c2369c79ac 100644
--- a/griffin/griffin.c
+++ b/griffin/griffin.c
@@ -1003,6 +1003,7 @@ FILTERS
#include "../gfx/video_filters/normal2x.c"
#include "../gfx/video_filters/normal2x_width.c"
#include "../gfx/video_filters/normal2x_height.c"
+#include "../gfx/video_filters/normal4x.c"
#include "../gfx/video_filters/scanline2x.c"
#include "../gfx/video_filters/grid2x.c"
#include "../gfx/video_filters/grid3x.c"