Move code back

2025-07-15 23:35:37 +02:00 · 2025-07-15 23:35:37 +02:00 · d37068cfb8
parent 3162b6a438
commit d37068cfb8
9 changed files with 426 additions and 327 deletions
--- a/gfx/gfx_animation.c
+++ b/gfx/gfx_animation.c
@ -314,8 +314,6 @@ static void gfx_animation_ticker_loop(uint64_t idx,
      size_t *offset2, size_t *width2,
      size_t *offset3, size_t *width3)
 {
-   int offset2_val;
-   /* Calculate ticker period and phase */
   int ticker_period     = (int)(str_width + spacer_width);
   int phase             = idx % ticker_period;

@ -334,30 +332,42 @@ static void gfx_animation_ticker_loop(uint64_t idx,
    * all the string manipulation that has to happen afterwards...
    */

-   /* Initialize offsets and widths */
-   *offset1 = (phase < (int)str_width) ? (size_t)phase : 0;
-   *width1  = (size_t)((phase < (int)str_width) ? (str_width - phase) : 0);
+   /* String 1 */
+   int offset = 0;
+   int width  = (int)(str_width - phase);
+   if (width < 0)
+      width   = 0;
+   else if ((width > (int)max_width))
+      width   = (int)max_width;

-   /* Clamp width1 to max_width */
-   if (*width1 > max_width)
-      *width1 = max_width;
+   if (phase < (int)str_width)
+      offset  = phase;

-   /* Calculate offset2 and width2 */
-   offset2_val = (int)(phase - str_width);
-   *offset2    = (offset2_val < 0) ? 0 : (size_t)offset2_val;
+   *offset1   = offset;
+   *width1    = width;

-   *width2 = (size_t)(max_width - *width1);
-   if (*width2 > spacer_width)
-      *width2 = spacer_width;
-   *width2   -= *offset2;
+   /* String 2 */
+   offset     = (int)(phase - str_width);
+   if (offset < 0)
+      offset  = 0;
+   width      = (int)(max_width - *width1);
+   if (width > (int)spacer_width)
+      width   = (int)spacer_width;
+   width     -= offset;

-   /* Calculate width3 */
-   *width3 = (size_t)(max_width - (*width1 + *width2));
-   if (*width3 < 0)
-      *width3 = 0;
+   *offset2   = offset;
+   *width2    = width;

-   /* offset3 is always zero */
-   *offset3 = 0;
+   /* String 3 */
+   width      = (int)(max_width - (*width1 + *width2));
+   if (width < 0)
+      width   = 0;
+
+   /* Note: offset is always zero here so offset3 is
+    * unnecessary - but include it anyway to preserve
+    * symmetry... */
+   *offset3   = 0;
+   *width3    = width;
 }

 static unsigned get_ticker_smooth_generic_scroll_offset(
--- a/gfx/video_driver.c
+++ b/gfx/video_driver.c
@ -2113,71 +2113,73 @@ void video_viewport_get_scaled_aspect2(struct video_viewport *vp,
      float device_aspect, float desired_aspect)
 {
   settings_t *settings = config_get_ptr();
-   video_driver_state_t *video_st = &video_driver_st;
-   int x = 0, y = 0;
-   float vp_bias_x = settings->floats.video_vp_bias_x;
-   float vp_bias_y = settings->floats.video_vp_bias_y;
-
+   video_driver_state_t
+      *video_st         = &video_driver_st;
+   int x                = 0;
+   int y                = 0;
+   float vp_bias_x      = settings->floats.video_vp_bias_x;
+   float vp_bias_y      = settings->floats.video_vp_bias_y;
 #if defined(RARCH_MOBILE)
   if (vp_width < vp_height)
   {
-      vp_bias_x = settings->floats.video_vp_bias_portrait_x;
-      vp_bias_y = settings->floats.video_vp_bias_portrait_y;
+      vp_bias_x         = settings->floats.video_vp_bias_portrait_x;
+      vp_bias_y         = settings->floats.video_vp_bias_portrait_y;
   }
 #endif
-
   if (!y_down)
-      vp_bias_y = 1.0f - vp_bias_y;
+      vp_bias_y         = 1.0 - vp_bias_y;

   if (settings->uints.video_aspect_ratio_idx == ASPECT_RATIO_CUSTOM)
   {
-      video_viewport_t *custom_vp = &settings->video_vp_custom;
-      int padding_x = vp_width - custom_vp->width;
-      int padding_y = vp_height - custom_vp->height;
+      video_viewport_t
+         *custom_vp     = &settings->video_vp_custom;
+      int padding_x     = 0;
+      int padding_y     = 0;

-      x = custom_vp->x;
-      y = custom_vp->y;
+      x                 = custom_vp->x;
+      y                 = custom_vp->y;

      if (!y_down)
-         y = vp->full_height - (y + custom_vp->height);
-
-      /* Adjust padding directly without checking negative */
+         y              = vp->full_height - (y + custom_vp->height);
+      padding_x        += (vp_width - custom_vp->width);
      if (padding_x < 0)
-         padding_x = -padding_x;
+         padding_x     *= 2;
+      padding_y         = vp_height - custom_vp->height;
      if (padding_y < 0)
-         padding_y = -padding_y;
-
-      vp_width  = custom_vp->width;
-      vp_height = custom_vp->height;
-
-      x += (int)(padding_x * vp_bias_x);
-      y += (int)(padding_y * vp_bias_y);
+         padding_y     *= 2;
+      vp_width          = custom_vp->width;
+      vp_height         = custom_vp->height;
+      x                += padding_x * vp_bias_x;
+      y                += padding_y * vp_bias_y;
   }
   else
   {
      float delta;
-      float aspect_diff = fabsf(device_aspect - desired_aspect);

-      if (aspect_diff >= 0.0001f)
+      if (fabsf(device_aspect - desired_aspect) < 0.0001f)
      {
-         if (device_aspect > desired_aspect)
-         {
-            delta     = (desired_aspect / device_aspect - 1.0f) / 2.0f + 0.5f;
-            x        += (int)roundf(vp_width * (0.5f - delta) * vp_bias_x * 2.0f);
-            vp_width  = (unsigned)roundf(vp_width * delta * 2.0f);
-         }
-         else
-         {
-            delta     = (device_aspect / desired_aspect - 1.0f) / 2.0f + 0.5f;
-            y        += (int)roundf(vp_height * (0.5f - delta) * vp_bias_y * 2.0f);
-            vp_height = (unsigned)roundf(vp_height * delta * 2.0f);
-         }
+         /* If the aspect ratios of screen and desired aspect
+          * ratio are sufficiently equal (floating point stuff),
+          * assume they are actually equal.
+          */
+      }
+      else if (device_aspect > desired_aspect)
+      {
+         delta      = (desired_aspect / device_aspect - 1.0f) / 2.0f + 0.5f;
+         x         += (int)roundf(vp_width * ((0.5f - delta) * (vp_bias_x * 2.0f)));
+         vp_width   = (unsigned)roundf(2.0f * vp_width * delta);
+      }
+      else
+      {
+         delta      = (device_aspect / desired_aspect - 1.0f) / 2.0f + 0.5f;
+         y         += (int)roundf(vp_height * ((0.5f - delta) * (vp_bias_y * 2.0f)));
+         vp_height  = (unsigned)roundf(2.0f * vp_height * delta);
      }
   }

-   vp->x = x;
-   vp->y = y;
-   vp->width = vp_width;
+   vp->x      = x;
+   vp->y      = y;
+   vp->width  = vp_width;
   vp->height = vp_height;

   /* Statistics */
--- a/libretro-common/audio/conversion/float_to_s16.c
+++ b/libretro-common/audio/conversion/float_to_s16.c
@ -109,11 +109,13 @@ void convert_float_to_s16(int16_t *s, const float *in, size_t len)
      _mm_storeu_si128((__m128i *)s, packed); /* Then put the result in the output array */
   }

-   len              -= i;
+   len               = len - i;
   i                 = 0;
   /* If there are any stray samples at the end, we need to convert them
    * (maybe the original array didn't contain a multiple of 8 samples) */
 #elif defined(__ALTIVEC__)
+   int samples_in    = len;
+
   /* Unaligned loads/store is a bit expensive,
    * so we optimize for the good path (very likely). */
   if (((uintptr_t)s & 15) + ((uintptr_t)in & 15) == 0)
@ -128,11 +130,20 @@ void convert_float_to_s16(int16_t *s, const float *in, size_t len)
         vec_st(vec_packs(result0, result1), 0, s);
      }

-      len    -= i;
+      samples_in    -= i;
   }

+   len               = samples_in;
   i                 = 0;
 #elif defined(_MIPS_ARCH_ALLEGREX)
+#ifdef DEBUG
+   /* Make sure the buffers are 16 byte aligned, this should be
+    * the default behaviour of malloc in the PSPSDK.
+    * Assume alignment. */
+   retro_assert(((uintptr_t)in  & 0xf) == 0);
+   retro_assert(((uintptr_t)s & 0xf) == 0);
+#endif
+
   for (i = 0; i + 8 <= len; i += 8)
   {
      __asm__ (
--- a/libretro-common/audio/conversion/s16_to_float.c
+++ b/libretro-common/audio/conversion/s16_to_float.c
@ -111,9 +111,11 @@ void convert_s16_to_float(float *s,
      _mm_storeu_ps(s + 4, output_r);
   }

-   len    -= i;
+   len     = len - i;
   i       = 0;
 #elif defined(__ALTIVEC__)
+   size_t samples_in = len;
+
   /* Unaligned loads/store is a bit expensive, so we
    * optimize for the good path (very likely). */
   if (((uintptr_t)s & 15) + ((uintptr_t)in & 15) == 0)
@ -133,15 +135,23 @@ void convert_s16_to_float(float *s,
         vec_st(out_lo, 16, s);
      }

-      len -= i;
+      samples_in -= i;
   }

+   len     = samples_in;
   i       = 0;
 #endif

   gain   /= 0x8000;

 #if defined(_MIPS_ARCH_ALLEGREX)
+#ifdef DEBUG
+   /* Make sure the buffer is 16 byte aligned, this should be the
+    * default behaviour of malloc in the PSPSDK.
+    * Only the output buffer can be assumed to be 16-byte aligned. */
+   retro_assert(((uintptr_t)s & 0xf) == 0);
+#endif
+
   __asm__ (
         ".set    push                    \n"
         ".set    noreorder               \n"
--- a/libretro-common/audio/resampler/drivers/sinc_resampler.c
+++ b/libretro-common/audio/resampler/drivers/sinc_resampler.c
@ -103,43 +103,43 @@ void process_sinc_neon_asm(float *out, const float *left,
 */
 static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data *data)
 {
-   rarch_sinc_resampler_t *re     = (rarch_sinc_resampler_t*)re_;
-   unsigned phases                = 1 << (re->phase_bits + re->subphase_bits);
+   rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
+   unsigned phases                = 1 << (resamp->phase_bits + resamp->subphase_bits);
   uint32_t ratio                 = phases / data->ratio;
   const float *input             = data->data_in;
   float *output                  = data->data_out;
   size_t frames                  = data->input_frames;
   size_t out_frames              = 0;
-   unsigned taps                  = re->taps;
+   unsigned taps                  = resamp->taps;
   while (frames)
   {
-      while (frames && re->time >= phases)
+      while (frames && resamp->time >= phases)
      {
         /* Push in reverse to make filter more obvious. */
-         if (!re->ptr)
-            re->ptr = taps;
-         re->ptr--;
+         if (!resamp->ptr)
+            resamp->ptr = taps;
+         resamp->ptr--;

-         re->buffer_l[re->ptr + taps] =
-         re->buffer_l[re->ptr       ] = *input++;
+         resamp->buffer_l[resamp->ptr + taps] =
+            resamp->buffer_l[resamp->ptr]     = *input++;

-         re->buffer_r[re->ptr + taps] =
-         re->buffer_r[re->ptr       ] = *input++;
+         resamp->buffer_r[resamp->ptr + taps] =
+            resamp->buffer_r[resamp->ptr]     = *input++;

-         re->time                    -= phases;
+         resamp->time                        -= phases;
         frames--;
      }

      {
-         const float *buffer_l    = re->buffer_l + re->ptr;
-         const float *buffer_r    = re->buffer_r + re->ptr;
-         while (re->time < phases)
+         const float *buffer_l    = resamp->buffer_l + resamp->ptr;
+         const float *buffer_r    = resamp->buffer_r + resamp->ptr;
+         while (resamp->time < phases)
         {
-            int i;
-            unsigned phase           = re->time >> re->subphase_bits;
-            const float *phase_table = re->phase_table + phase * taps * 2;
+            unsigned phase           = resamp->time >> resamp->subphase_bits;
+            const float *phase_table = resamp->phase_table + phase * taps * 2;
            const float *delta_table = phase_table + taps;
-            float32x4_t delta        = vdupq_n_f32((re->time & re->subphase_mask) * re->subphase_mod);
+            float32x4_t delta        = vdupq_n_f32((resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
+            int i;
            float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
            float32x2_t p3, p4;

@ -164,7 +164,7 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data
            vst1_f32(output, vpadd_f32(p3, p4));
            output                 += 2;
            out_frames++;
-            re->time           += ratio;
+            resamp->time           += ratio;
         }
      }
   }
@ -176,47 +176,47 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data
 /* Assumes that taps >= 8, and that taps is a multiple of 8. */
 static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
 {
-   rarch_sinc_resampler_t *re     = (rarch_sinc_resampler_t*)re_;
-   unsigned phases                = 1 << (re->phase_bits + re->subphase_bits);
+   rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
+   unsigned phases                = 1 << (resamp->phase_bits + resamp->subphase_bits);
   uint32_t ratio                 = phases / data->ratio;
   const float *input             = data->data_in;
   float *output                  = data->data_out;
   size_t frames                  = data->input_frames;
   size_t out_frames              = 0;
-   unsigned taps                  = re->taps;
+   unsigned taps                  = resamp->taps;

   while (frames)
   {
-      while (frames && re->time >= phases)
+      while (frames && resamp->time >= phases)
      {
         /* Push in reverse to make filter more obvious. */
-         if (!re->ptr)
-            re->ptr = taps;
-         re->ptr--;
+         if (!resamp->ptr)
+            resamp->ptr = taps;
+         resamp->ptr--;

-         re->buffer_l[re->ptr + taps] =
-         re->buffer_l[re->ptr       ]  = *input++;
+         resamp->buffer_l[resamp->ptr + taps] =
+            resamp->buffer_l[resamp->ptr]     = *input++;

-         re->buffer_r[re->ptr + taps] =
-         re->buffer_r[re->ptr       ] = *input++;
+         resamp->buffer_r[resamp->ptr + taps] =
+            resamp->buffer_r[resamp->ptr]     = *input++;

-         re->time                    -= phases;
+         resamp->time                        -= phases;
         frames--;
      }

      {
-         const float *buffer_l    = re->buffer_l + re->ptr;
-         const float *buffer_r    = re->buffer_r + re->ptr;
-         while (re->time < phases)
+         const float *buffer_l    = resamp->buffer_l + resamp->ptr;
+         const float *buffer_r    = resamp->buffer_r + resamp->ptr;
+         while (resamp->time < phases)
         {
-            unsigned phase           = re->time >> re->subphase_bits;
-            const float *phase_table = re->phase_table + phase * taps;
+            unsigned phase           = resamp->time >> resamp->subphase_bits;
+            const float *phase_table = resamp->phase_table + phase * taps;
 #ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
            process_sinc_neon_asm(output, buffer_l, buffer_r, phase_table, taps);
 #else
            int i;
-            float32x2_t p3, p4;
            float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
+            float32x2_t p3, p4;

            for (i = 0; i < (int)taps; i += 8)
            {
@ -234,9 +234,9 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
            p4 = vadd_f32(vget_low_f32(p2), vget_high_f32(p2));
            vst1_f32(output, vpadd_f32(p3, p4));
 #endif
+            output                 += 2;
            out_frames++;
-            output   += 2;
-            re->time += ratio;
+            resamp->time           += ratio;
         }
      }
   }
@ -248,47 +248,49 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
 #if defined(__AVX__)
 static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *data)
 {
-   rarch_sinc_resampler_t *re     = (rarch_sinc_resampler_t*)re_;
-   unsigned phases                = 1 << (re->phase_bits + re->subphase_bits);
+   rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
+   unsigned phases                = 1 << (resamp->phase_bits + resamp->subphase_bits);

   uint32_t ratio                 = phases / data->ratio;
   const float *input             = data->data_in;
   float *output                  = data->data_out;
   size_t frames                  = data->input_frames;
   size_t out_frames              = 0;
-   unsigned taps                  = re->taps;
+   unsigned taps                  = resamp->taps;

+   {
      while (frames)
      {
-         while (frames && re->time >= phases)
+         while (frames && resamp->time >= phases)
         {
            /* Push in reverse to make filter more obvious. */
-            if (!re->ptr)
-               re->ptr = taps;
-            re->ptr--;
+            if (!resamp->ptr)
+               resamp->ptr = taps;
+            resamp->ptr--;

-            re->buffer_l[re->ptr + taps] =
-            re->buffer_l[re->ptr       ] = *input++;
+            resamp->buffer_l[resamp->ptr + taps] =
+               resamp->buffer_l[resamp->ptr]     = *input++;

-            re->buffer_r[re->ptr + taps] =
-            re->buffer_r[re->ptr       ] = *input++;
+            resamp->buffer_r[resamp->ptr + taps] =
+               resamp->buffer_r[resamp->ptr]     = *input++;

-            re->time -= phases;
+            resamp->time                                -= phases;
            frames--;
         }

         {
-            const float *buffer_l    = re->buffer_l + re->ptr;
-            const float *buffer_r    = re->buffer_r + re->ptr;
-            while (re->time < phases)
+            const float *buffer_l    = resamp->buffer_l + resamp->ptr;
+            const float *buffer_r    = resamp->buffer_r + resamp->ptr;
+            while (resamp->time < phases)
            {
               int i;
-               unsigned phase           = re->time >> re->subphase_bits;
+               unsigned phase           = resamp->time >> resamp->subphase_bits;

-               float *phase_table       = re->phase_table + phase * taps * 2;
-               float *delta_table       = phase_table     + taps;
+               float *phase_table       = resamp->phase_table + phase * taps * 2;
+               float *delta_table       = phase_table + taps;
               __m256 delta             = _mm256_set1_ps((float)
-                     (re->time & re->subphase_mask) * re->subphase_mod);
+                     (resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
+
               __m256 sum_l             = _mm256_setzero_ps();
               __m256 sum_r             = _mm256_setzero_ps();

@ -318,56 +320,58 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
               _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
               _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));

+               output += 2;
               out_frames++;
-               output   += 2;
-               re->time += ratio;
+               resamp->time += ratio;
            }
         }
      }
+   }

   data->output_frames = out_frames;
 }

 static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
 {
-   rarch_sinc_resampler_t *re     = (rarch_sinc_resampler_t*)re_;
-   unsigned phases                = 1 << (re->phase_bits + re->subphase_bits);
+   rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
+   unsigned phases                = 1 << (resamp->phase_bits + resamp->subphase_bits);

   uint32_t ratio                 = phases / data->ratio;
   const float *input             = data->data_in;
   float *output                  = data->data_out;
   size_t frames                  = data->input_frames;
   size_t out_frames              = 0;
-   unsigned taps                  = re->taps;
+   unsigned taps                  = resamp->taps;

+   {
      while (frames)
      {
-         while (frames && re->time >= phases)
+         while (frames && resamp->time >= phases)
         {
            /* Push in reverse to make filter more obvious. */
-            if (!re->ptr)
-               re->ptr = taps;
-            re->ptr--;
+            if (!resamp->ptr)
+               resamp->ptr = taps;
+            resamp->ptr--;

-            re->buffer_l[re->ptr + taps] =
-            re->buffer_l[re->ptr       ] = *input++;
+            resamp->buffer_l[resamp->ptr + taps] =
+               resamp->buffer_l[resamp->ptr]     = *input++;

-            re->buffer_r[re->ptr + taps] =
-            re->buffer_r[re->ptr       ] = *input++;
+            resamp->buffer_r[resamp->ptr + taps] =
+               resamp->buffer_r[resamp->ptr]     = *input++;

-            re->time                    -= phases;
+            resamp->time                        -= phases;
            frames--;
         }

         {
-            const float *buffer_l    = re->buffer_l + re->ptr;
-            const float *buffer_r    = re->buffer_r + re->ptr;
-            while (re->time < phases)
+            const float *buffer_l    = resamp->buffer_l + resamp->ptr;
+            const float *buffer_r    = resamp->buffer_r + resamp->ptr;
+            while (resamp->time < phases)
            {
               int i;
               __m256 delta;
-               unsigned phase           = re->time >> re->subphase_bits;
-               float *phase_table       = re->phase_table + phase * taps;
+               unsigned phase           = resamp->time >> resamp->subphase_bits;
+               float *phase_table       = resamp->phase_table + phase * taps;

               __m256 sum_l             = _mm256_setzero_ps();
               __m256 sum_r             = _mm256_setzero_ps();
@ -396,12 +400,13 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
               _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
               _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));

+               output += 2;
               out_frames++;
-               output   += 2;
-               re->time += ratio;
+               resamp->time += ratio;
            }
         }
      }
+   }

   data->output_frames = out_frames;
 }
@ -410,47 +415,48 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
 #if defined(__SSE__)
 static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *data)
 {
-   rarch_sinc_resampler_t *re     = (rarch_sinc_resampler_t*)re_;
-   unsigned phases                = 1 << (re->phase_bits + re->subphase_bits);
+   rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
+   unsigned phases                = 1 << (resamp->phase_bits + resamp->subphase_bits);

   uint32_t ratio                 = phases / data->ratio;
   const float *input             = data->data_in;
   float *output                  = data->data_out;
   size_t frames                  = data->input_frames;
   size_t out_frames              = 0;
-   unsigned taps                  = re->taps;
+   unsigned taps                  = resamp->taps;

+   {
      while (frames)
      {
-         while (frames && re->time >= phases)
+         while (frames && resamp->time >= phases)
         {
            /* Push in reverse to make filter more obvious. */
-            if (!re->ptr)
-               re->ptr = taps;
-            re->ptr--;
+            if (!resamp->ptr)
+               resamp->ptr = taps;
+            resamp->ptr--;

-            re->buffer_l[re->ptr + taps] =
-            re->buffer_l[re->ptr       ] = *input++;
+            resamp->buffer_l[resamp->ptr + taps] =
+               resamp->buffer_l[resamp->ptr]     = *input++;

-            re->buffer_r[re->ptr + taps] =
-            re->buffer_r[re->ptr       ] = *input++;
+            resamp->buffer_r[resamp->ptr + taps] =
+               resamp->buffer_r[resamp->ptr]     = *input++;

-            re->time                    -= phases;
+            resamp->time                        -= phases;
            frames--;
         }

         {
-            const float *buffer_l    = re->buffer_l + re->ptr;
-            const float *buffer_r    = re->buffer_r + re->ptr;
-            while (re->time < phases)
+            const float *buffer_l    = resamp->buffer_l + resamp->ptr;
+            const float *buffer_r    = resamp->buffer_r + resamp->ptr;
+            while (resamp->time < phases)
            {
               int i;
               __m128 sum;
-               unsigned phase           = re->time >> re->subphase_bits;
-               float *phase_table       = re->phase_table + phase * taps * 2;
+               unsigned phase           = resamp->time >> resamp->subphase_bits;
+               float *phase_table       = resamp->phase_table + phase * taps * 2;
               float *delta_table       = phase_table + taps;
               __m128 delta             = _mm_set1_ps((float)
-                     (re->time & re->subphase_mask) * re->subphase_mod);
+                     (resamp->time & resamp->subphase_mask) * resamp->subphase_mod);

               __m128 sum_l             = _mm_setzero_ps();
               __m128 sum_r             = _mm_setzero_ps();
@ -491,56 +497,58 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *
               /* movehl { X, R, X, L } == { X, R, X, R } */
               _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));

+               output += 2;
               out_frames++;
-               output   += 2;
-               re->time += ratio;
+               resamp->time += ratio;
            }
         }
      }
+   }

   data->output_frames = out_frames;
 }

 static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
 {
-   rarch_sinc_resampler_t *re     = (rarch_sinc_resampler_t*)re_;
-   unsigned phases                = 1 << (re->phase_bits + re->subphase_bits);
+   rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
+   unsigned phases                = 1 << (resamp->phase_bits + resamp->subphase_bits);

   uint32_t ratio                 = phases / data->ratio;
   const float *input             = data->data_in;
   float *output                  = data->data_out;
   size_t frames                  = data->input_frames;
   size_t out_frames              = 0;
-   unsigned taps                  = re->taps;
+   unsigned taps                  = resamp->taps;

+   {
      while (frames)
      {
-         while (frames && re->time >= phases)
+         while (frames && resamp->time >= phases)
         {
            /* Push in reverse to make filter more obvious. */
-            if (!re->ptr)
-               re->ptr = taps;
-            re->ptr--;
+            if (!resamp->ptr)
+               resamp->ptr = taps;
+            resamp->ptr--;

-            re->buffer_l[re->ptr + taps] =
-            re->buffer_l[re->ptr       ] = *input++;
+            resamp->buffer_l[resamp->ptr + taps] =
+               resamp->buffer_l[resamp->ptr]     = *input++;

-            re->buffer_r[re->ptr + taps] =
-            re->buffer_r[re->ptr       ] = *input++;
+            resamp->buffer_r[resamp->ptr + taps] =
+               resamp->buffer_r[resamp->ptr]     = *input++;

-            re->time                    -= phases;
+            resamp->time                        -= phases;
            frames--;
         }

         {
-            const float *buffer_l    = re->buffer_l + re->ptr;
-            const float *buffer_r    = re->buffer_r + re->ptr;
-            while (re->time < phases)
+            const float *buffer_l    = resamp->buffer_l + resamp->ptr;
+            const float *buffer_r    = resamp->buffer_r + resamp->ptr;
+            while (resamp->time < phases)
            {
               int i;
               __m128 sum;
-               unsigned phase           = re->time >> re->subphase_bits;
-               float *phase_table       = re->phase_table + phase * taps;
+               unsigned phase           = resamp->time >> resamp->subphase_bits;
+               float *phase_table       = resamp->phase_table + phase * taps;

               __m128 sum_l             = _mm_setzero_ps();
               __m128 sum_r             = _mm_setzero_ps();
@ -559,9 +567,9 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
                * sum_r = { r3, r2, r1, r0 }
                */

-               sum     = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
-                         _MM_SHUFFLE(1, 0, 1, 0)),
-                         _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
+               sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
+                        _MM_SHUFFLE(1, 0, 1, 0)),
+                     _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));

               /* sum   = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
                * sum   = { R1, R0, L1, L0 }
@ -579,12 +587,13 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
               /* movehl { X, R, X, L } == { X, R, X, R } */
               _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));

+               output += 2;
               out_frames++;
-               output   += 2;
-               re->time += ratio;
+               resamp->time += ratio;
            }
         }
      }
+   }

   data->output_frames = out_frames;
 }
@ -592,48 +601,49 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)

 static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *data)
 {
-   rarch_sinc_resampler_t *re     = (rarch_sinc_resampler_t*)re_;
-   unsigned phases                = 1 << (re->phase_bits + re->subphase_bits);
+   rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
+   unsigned phases                = 1 << (resamp->phase_bits + resamp->subphase_bits);

   uint32_t ratio                 = phases / data->ratio;
   const float *input             = data->data_in;
   float *output                  = data->data_out;
   size_t frames                  = data->input_frames;
   size_t out_frames              = 0;
-   unsigned taps                  = re->taps;
+   unsigned taps                  = resamp->taps;

-   while (frames)
   {
-         while (frames && re->time >= phases)
+      while (frames)
+      {
+         while (frames && resamp->time >= phases)
         {
            /* Push in reverse to make filter more obvious. */
-            if (!re->ptr)
-               re->ptr = taps;
-            re->ptr--;
+            if (!resamp->ptr)
+               resamp->ptr = taps;
+            resamp->ptr--;

-            re->buffer_l[re->ptr + taps]    =
-            re->buffer_l[re->ptr       ]    = *input++;
+            resamp->buffer_l[resamp->ptr + taps]    =
+               resamp->buffer_l[resamp->ptr]        = *input++;

-            re->buffer_r[re->ptr + taps]    =
-            re->buffer_r[re->ptr       ]    = *input++;
+            resamp->buffer_r[resamp->ptr + taps]    =
+               resamp->buffer_r[resamp->ptr]        = *input++;

-            re->time                       -= phases;
+            resamp->time                           -= phases;
            frames--;
         }

         {
-            const float *buffer_l    = re->buffer_l + re->ptr;
-            const float *buffer_r    = re->buffer_r + re->ptr;
-            while (re->time < phases)
+            const float *buffer_l    = resamp->buffer_l + resamp->ptr;
+            const float *buffer_r    = resamp->buffer_r + resamp->ptr;
+            while (resamp->time < phases)
            {
               int i;
               float sum_l              = 0.0f;
               float sum_r              = 0.0f;
-               unsigned phase           = re->time >> re->subphase_bits;
-               float *phase_table       = re->phase_table + phase * taps * 2;
+               unsigned phase           = resamp->time >> resamp->subphase_bits;
+               float *phase_table       = resamp->phase_table + phase * taps * 2;
               float *delta_table       = phase_table + taps;
               float delta              = (float)
-                  (re->time & re->subphase_mask) * re->subphase_mod;
+                  (resamp->time & resamp->subphase_mask) * resamp->subphase_mod;

               for (i = 0; i < (int)taps; i++)
               {
@ -648,9 +658,11 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da

               output                  += 2;
               out_frames++;
-               re->time            += ratio;
+               resamp->time            += ratio;
            }
         }
+
+      }
   }

   data->output_frames = out_frames;
@ -658,45 +670,46 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da

 static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
 {
-   rarch_sinc_resampler_t *re     = (rarch_sinc_resampler_t*)re_;
-   unsigned phases                = 1 << (re->phase_bits + re->subphase_bits);
+   rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
+   unsigned phases                = 1 << (resamp->phase_bits + resamp->subphase_bits);

   uint32_t ratio                 = phases / data->ratio;
   const float *input             = data->data_in;
   float *output                  = data->data_out;
   size_t frames                  = data->input_frames;
   size_t out_frames              = 0;
-   unsigned taps                  = re->taps;
+   unsigned taps                  = resamp->taps;

+   {
      while (frames)
      {
-         while (frames && re->time >= phases)
+         while (frames && resamp->time >= phases)
         {
            /* Push in reverse to make filter more obvious. */
-            if (!re->ptr)
-               re->ptr = taps;
-            re->ptr--;
+            if (!resamp->ptr)
+               resamp->ptr = taps;
+            resamp->ptr--;

-            re->buffer_l[re->ptr + taps]    =
-            re->buffer_l[re->ptr       ]    = *input++;
+            resamp->buffer_l[resamp->ptr + taps]    =
+               resamp->buffer_l[resamp->ptr]        = *input++;

-            re->buffer_r[re->ptr + taps]    =
-            re->buffer_r[re->ptr       ]    = *input++;
+            resamp->buffer_r[resamp->ptr + taps]    =
+               resamp->buffer_r[resamp->ptr]        = *input++;

-            re->time                       -= phases;
+            resamp->time                           -= phases;
            frames--;
         }

         {
-            const float *buffer_l    = re->buffer_l + re->ptr;
-            const float *buffer_r    = re->buffer_r + re->ptr;
-            while (re->time < phases)
+            const float *buffer_l    = resamp->buffer_l + resamp->ptr;
+            const float *buffer_r    = resamp->buffer_r + resamp->ptr;
+            while (resamp->time < phases)
            {
               int i;
               float sum_l              = 0.0f;
               float sum_r              = 0.0f;
-               unsigned phase           = re->time >> re->subphase_bits;
-               float *phase_table       = re->phase_table + phase * taps;
+               unsigned phase           = resamp->time >> resamp->subphase_bits;
+               float *phase_table       = resamp->phase_table + phase * taps;

               for (i = 0; i < (int)taps; i++)
               {
@ -711,30 +724,31 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)

               output                  += 2;
               out_frames++;
-               re->time            += ratio;
+               resamp->time            += ratio;
            }
         }

      }
+   }

   data->output_frames = out_frames;
 }

 static void resampler_sinc_free(void *data)
 {
-   rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)data;
-   if (re)
-      memalign_free(re->main_buffer);
-   free(re);
+   rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)data;
+   if (resamp)
+      memalign_free(resamp->main_buffer);
+   free(resamp);
 }

-static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re,
+static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp,
      double cutoff,
      float *phase_table, int phases, int taps, bool calculate_delta)
 {
   int i, j;
   /* Kaiser window function - need to normalize w(0) to 1.0f */
-   float kaiser_beta    = re->kaiser_beta;
+   float kaiser_beta    = resamp->kaiser_beta;
   double    window_mod = besseli0(kaiser_beta);
   int           stride = calculate_delta ? 2 : 1;
   double     sidelobes = taps / 2.0;
@ -743,10 +757,13 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re,
   {
      for (j = 0; j < taps; j++)
      {
+         float val;
+         double sinc_phase;
         int               n = j * phases + i;
-         double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0;
-         double sinc_phase   = sidelobes * window_phase;
-         float val           = cutoff * sinc(M_PI * sinc_phase * cutoff) *
+         double window_phase = (double)n / (phases * taps); /* [0, 1). */
+         window_phase        = 2.0 * window_phase - 1.0; /* [-1, 1) */
+         sinc_phase          = sidelobes * window_phase;
+         val                 = cutoff * sinc(M_PI * sinc_phase * cutoff) *
              besseli0(kaiser_beta * sqrtf(1 - window_phase * window_phase))
            / window_mod;
         phase_table[i * stride * taps + j] = val;
@ -771,20 +788,24 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re,
      phase = phases - 1;
      for (j = 0; j < taps; j++)
      {
+         float val, delta;
+         double sinc_phase;
         int n               = j * phases + (phase + 1);
-         double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0;
-         double sinc_phase   = sidelobes * window_phase;
-         float val   = cutoff * sinc(M_PI * sinc_phase * cutoff)
-                 * besseli0(re->kaiser_beta * sqrtf(1 - window_phase
-                 * window_phase)) / window_mod;
-         float delta = (val - phase_table[phase * stride * taps + j]);
+         double window_phase = (double)n / (phases * taps); /* (0, 1]. */
+         window_phase        = 2.0 * window_phase - 1.0; /* (-1, 1] */
+         sinc_phase          = sidelobes * window_phase;
+
+         val                 = cutoff * sinc(M_PI * sinc_phase * cutoff) *
+              besseli0(resamp->kaiser_beta * sqrtf(1 - window_phase *
+window_phase)) / window_mod;
+         delta = (val - phase_table[phase * stride * taps + j]);
         phase_table[(phase * stride + 1) * taps + j] = delta;
      }
   }
 }

 static void sinc_init_table_lanczos(
-      rarch_sinc_resampler_t *re, double cutoff,
+      rarch_sinc_resampler_t *resamp, double cutoff,
      float *phase_table, int phases, int taps, bool calculate_delta)
 {
   int i, j;
@ -797,10 +818,13 @@ static void sinc_init_table_lanczos(
   {
      for (j = 0; j < taps; j++)
      {
+         double sinc_phase;
+         float val;
         int               n = j * phases + i;
-         double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0; /* [-1, 1) */
-         double sinc_phase   = sidelobes * window_phase;
-         float val           = cutoff * sinc(M_PI * sinc_phase * cutoff) *
+         double window_phase = (double)n / (phases * taps); /* [0, 1). */
+         window_phase        = 2.0 * window_phase - 1.0; /* [-1, 1) */
+         sinc_phase          = sidelobes * window_phase;
+         val                 = cutoff * sinc(M_PI * sinc_phase * cutoff) *
            sinc(M_PI * window_phase) / window_mod;
         phase_table[i * stride * taps + j] = val;
      }
@ -824,12 +848,16 @@ static void sinc_init_table_lanczos(
      phase = phases - 1;
      for (j = 0; j < taps; j++)
      {
+         float val, delta;
+         double sinc_phase;
         int n               = j * phases + (phase + 1);
-         double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0;
-         double sinc_phase   = sidelobes * window_phase;
-         float val           = cutoff * sinc(M_PI * sinc_phase * cutoff) *
+         double window_phase = (double)n / (phases * taps); /* (0, 1]. */
+         window_phase        = 2.0 * window_phase - 1.0; /* (-1, 1] */
+         sinc_phase          = sidelobes * window_phase;
+
+         val                 = cutoff * sinc(M_PI * sinc_phase * cutoff) *
            sinc(M_PI * window_phase) / window_mod;
-         float delta         = (val - phase_table[phase * stride * taps + j]);
+         delta = (val - phase_table[phase * stride * taps + j]);
         phase_table[(phase * stride + 1) * taps + j] = delta;
      }
   }
--- a/libretro-common/compat/compat_strl.c
+++ b/libretro-common/compat/compat_strl.c
@ -20,14 +20,14 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

-/* Implementation of strlcpy()/strlcat() based on OpenBSD. */
-
-#ifndef __MACH__
 #include <stdlib.h>
 #include <ctype.h>

 #include <compat/strl.h>

+/* Implementation of strlcpy()/strlcat() based on OpenBSD. */
+
+#ifndef __MACH__
 size_t strlcpy(char *s, const char *source, size_t len)
 {
   size_t _len  = len;
@ -52,4 +52,4 @@ size_t strlcat(char *s, const char *source, size_t len)
      len -= _len;
   return _len + strlcpy(s, source, len);
 }
-#endif
+#endif
--- a/libretro-common/file/file_path.c
+++ b/libretro-common/file/file_path.c
@ -353,25 +353,10 @@ size_t fill_pathname(char *s, const char *in_path,
 **/
 char *find_last_slash(const char *str)
 {
-   const char *p;
-   const char *last_slash     = NULL;
-   const char *last_backslash = NULL;
-
-   /* Traverse the string once */
-   for (p = str; *p != '\0'; ++p)
-   {
-      if (*p == '/')
-         last_slash = p; /* Update last forward slash */
-      else if (*p == '\\')
-         last_backslash = p; /* Update last backslash */
-   }
-
-   /* Determine which one is last */
-   if (!last_slash) /* Backslash */
-      return (char*)last_backslash;
-   if (!last_backslash) /* Forward slash */
-      return (char*)last_slash;
-   return (last_backslash > last_slash) ? (char*)last_backslash : (char*)last_slash;
+   const char *slash     = strrchr(str, '/');
+   const char *backslash = strrchr(str, '\\');
+   char       *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
+   return last_slash;
 }

 /**
@ -384,15 +369,14 @@ char *find_last_slash(const char *str)
 **/
 size_t fill_pathname_slash(char *s, size_t len)
 {
-   char       *last_slash = find_last_slash(s);
-   len                    = strlen(s);
+   const char *slash      = strrchr(s, '/');
+   const char *backslash  = strrchr(s, '\\');
+   char       *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
   if (!last_slash)
-   {
-      s[  len] = PATH_DEFAULT_SLASH_C();
-      s[++len] = '\0';
-   }
+      return strlcat(s, PATH_DEFAULT_SLASH(), len);
+   len         = strlen(s);
   /* Try to preserve slash type. */
-   else if (last_slash != (s + len - 1))
+   if (last_slash != (s + len - 1))
   {
      s[  len] = last_slash[0];
      s[++len] = '\0';
@ -478,12 +462,16 @@ size_t fill_pathname_parent_dir_name(char *s, const char *in_dir, size_t len)
 {
   size_t _len           = 0;
   char *tmp             = strdup(in_dir);
-   char *last_slash      = find_last_slash(tmp);
+   const char *slash     = strrchr(tmp, '/');
+   const char *backslash = strrchr(tmp, '\\');
+   char *last_slash      = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;

   if (last_slash && last_slash[1] == 0)
   {
      *last_slash        = '\0';
-      last_slash         = find_last_slash(tmp);
+      slash              = strrchr(tmp, '/');
+      backslash          = strrchr(tmp, '\\');
+      last_slash         = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
   }

   /* Cut the last part of the string (the filename) after the slash,
@ -494,7 +482,9 @@ size_t fill_pathname_parent_dir_name(char *s, const char *in_dir, size_t len)
   /* Point in_dir to the address of the last slash.
    * If in_dir is NULL, it means there was no slash in tmp,
    * so use tmp as-is. */
-   in_dir                = find_last_slash(tmp);
+   slash                 = strrchr(tmp, '/');
+   backslash             = strrchr(tmp, '\\');
+   in_dir                = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
   if (!in_dir)
       in_dir            = tmp;

@ -602,19 +592,23 @@ size_t fill_str_dated_filename(char *s,
 **/
 size_t path_basedir(char *s)
 {
+   const char *slash;
+   const char *backslash;
   char *last_slash = NULL;
   if (!s || s[0] == '\0' || s[1] == '\0')
      return (s && s[0] != '\0') ? 1 : 0;
-   last_slash       = find_last_slash(s);
-   if (!last_slash)
+   slash            = strrchr(s, '/');
+   backslash        = strrchr(s, '\\');
+   last_slash       = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
+   if (last_slash)
   {
-      s[0]          = '.';
-      s[1]          = PATH_DEFAULT_SLASH_C();
-      s[2]          = '\0';
-      return 2;
+      last_slash[1] = '\0';
+      return last_slash + 1 - s;
   }
-   last_slash[1] = '\0';
-   return last_slash + 1 - s;
+   s[0]             = '.';
+   s[1]             = PATH_DEFAULT_SLASH_C();
+   s[2]             = '\0';
+   return 2;
 }

 /**
@ -636,9 +630,15 @@ size_t path_parent_dir(char *s, size_t len)
   if (len && PATH_CHAR_IS_SLASH(s[len - 1]))
   {
      char *last_slash;
+      const char *slash;
+      const char *backslash;
      bool was_absolute = path_is_absolute(s);
+
      s[len - 1]        = '\0';
-      last_slash        = find_last_slash(s);
+
+      slash             = strrchr(s, '/');
+      backslash         = strrchr(s, '\\');
+      last_slash        = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;

      if (was_absolute && !last_slash)
      {
@ -667,7 +667,9 @@ const char *path_basename(const char *path)
   /* We cut either at the first compression-related hash,
    * or we cut at the last slash */
   const char *ptr       = NULL;
-   char *last_slash      = find_last_slash(path);
+   const char *slash     = strrchr(path, '/');
+   const char *backslash = strrchr(path, '\\');
+   char *last_slash      = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
   return ((ptr = path_get_archive_delim(path)) || (ptr = last_slash))
      ? (ptr + 1) : path;
 }
@ -685,7 +687,9 @@ const char *path_basename(const char *path)
 const char *path_basename_nocompression(const char *path)
 {
   /* We cut at the last slash */
-   char *last_slash = find_last_slash(path);
+   const char *slash     = strrchr(path, '/');
+   const char *backslash = strrchr(path, '\\');
+   char *last_slash      = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
   return (last_slash) ? (last_slash + 1) : path;
 }

@ -995,18 +999,22 @@ size_t fill_pathname_join_special(char *s,

   if (*s)
   {
-      char *last_slash       = find_last_slash(s);
-      if (!last_slash)
+      const char *slash      = strrchr(s, '/');
+      const char *backslash  = strrchr(s, '\\');
+      char *last_slash       = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
+      if (last_slash)
      {
-         s[  _len] = PATH_DEFAULT_SLASH_C();
-         s[++_len] = '\0';
-
+         /* Try to preserve slash type. */
+         if (last_slash != (s + _len - 1))
+         {
+            s[  _len] = last_slash[0];
+            s[++_len] = '\0';
+         }
      }
-      /* Try to preserve slash type. */
-      else if (last_slash != (s + _len - 1))
+      else
      {
-         s[  _len] = last_slash[0];
-         s[++_len] = '\0';
+         s[  _len]    = PATH_DEFAULT_SLASH_C();
+         s[++_len]    = '\0';
      }
   }

@ -1319,6 +1327,8 @@ size_t fill_pathname_abbreviated_or_relative(char *s,
 **/
 void path_basedir_wrapper(char *s)
 {
+   const char *slash;
+   const char *backslash;
   char *last_slash = NULL;
   if (!s || s[0] == '\0' || s[1] == '\0')
      return;
@ -1327,7 +1337,9 @@ void path_basedir_wrapper(char *s)
   if ((last_slash  = (char*)path_get_archive_delim(s)))
      *last_slash   = '\0';
 #endif
-   last_slash       = find_last_slash(s);
+   slash            = strrchr(s, '/');
+   backslash        = strrchr(s, '\\');
+   last_slash       = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
   if (last_slash)
      last_slash[1] = '\0';
   else
--- a/libretro-common/include/file/file_path.h
+++ b/libretro-common/include/file/file_path.h
@ -628,6 +628,7 @@ void path_basedir_wrapper(char *s);
 * if not already there.

 * Hidden non-leaf function cost:
+ * - can call strlcat once if it returns false
 * - calls strlen
 **/
 size_t fill_pathname_slash(char *s, size_t len);
--- a/libretro-common/lists/file_list.c
+++ b/libretro-common/lists/file_list.c
@ -57,17 +57,16 @@ static bool file_list_deinitialize_internal(file_list_t *list)

 bool file_list_reserve(file_list_t *list, size_t nitems)
 {
-   struct item_file *new_data;
   const size_t item_size = sizeof(struct item_file);
+   struct item_file *new_data;

-   if (nitems < list->capacity || nitems > (size_t)-1 / item_size)
+   if (nitems < list->capacity || nitems > (size_t)-1/item_size)
      return false;

-   /* Allocate the new memory block */
   if (!(new_data = (struct item_file*)realloc(list->list, nitems * item_size)))
      return false;

-   memset(new_data + list->capacity, 0, (nitems - list->capacity) * item_size);
+   memset(&new_data[list->capacity], 0, item_size * (nitems - list->capacity));

   list->list     = new_data;
   list->capacity = nitems;
@ -78,31 +77,57 @@ bool file_list_reserve(file_list_t *list, size_t nitems)
 bool file_list_insert(file_list_t *list,
      const char *path, const char *label,
      unsigned type, size_t directory_ptr,
-      size_t entry_idx, size_t idx)
+      size_t entry_idx,
+      size_t idx)
 {
-   struct item_file *new_item;
+   int i;
+
   /* Expand file list if needed */
-   if (      list->size >= list->capacity
-         && !file_list_reserve(list, list->capacity * 2 + 1))
-      return false;
+   if (list->size >= list->capacity)
+      if (!file_list_reserve(list, list->capacity * 2 + 1))
+         return false;

-   /* Shift elements to make room for the new item */
-   memmove(&list->list[idx + 1],
-         &list->list[idx],
-         (list->size - idx) * sizeof(struct item_file));
+   for (i = (unsigned)list->size; i > (int)idx; i--)
+   {
+      struct item_file *copy = (struct item_file*)
+         malloc(sizeof(struct item_file));

-   /* Initialize the new item */
-   new_item                = &list->list[idx];
-   new_item->path          = path ? strdup(path) : NULL;
-   new_item->label         = label ? strdup(label) : NULL;
-   new_item->alt           = NULL;
-   new_item->type          = type;
-   new_item->directory_ptr = directory_ptr;
-   new_item->entry_idx     = entry_idx;
-   new_item->userdata      = NULL;
-   new_item->actiondata    = NULL;
+      if (copy)
+      {
+         copy->path             = NULL;
+         copy->label            = NULL;
+         copy->alt              = NULL;
+         copy->type             = 0;
+         copy->directory_ptr    = 0;
+         copy->entry_idx        = 0;
+         copy->userdata         = NULL;
+         copy->actiondata       = NULL;
+
+         memcpy(copy, &list->list[i-1], sizeof(struct item_file));
+
+         memcpy(&list->list[i-1], &list->list[i], sizeof(struct item_file));
+         memcpy(&list->list[i],             copy, sizeof(struct item_file));
+
+         free(copy);
+      }
+   }
+
+   list->list[idx].path          = NULL;
+   list->list[idx].label         = NULL;
+   list->list[idx].alt           = NULL;
+   list->list[idx].type          = type;
+   list->list[idx].directory_ptr = directory_ptr;
+   list->list[idx].entry_idx     = entry_idx;
+   list->list[idx].userdata      = NULL;
+   list->list[idx].actiondata    = NULL;
+
+   if (label)
+      list->list[idx].label      = strdup(label);
+   if (path)
+      list->list[idx].path       = strdup(path);

   list->size++;
+
   return true;
 }