diff --git a/gfx/gfx_animation.c b/gfx/gfx_animation.c index 3e09fb83c6..07c91389ba 100644 --- a/gfx/gfx_animation.c +++ b/gfx/gfx_animation.c @@ -314,8 +314,6 @@ static void gfx_animation_ticker_loop(uint64_t idx, size_t *offset2, size_t *width2, size_t *offset3, size_t *width3) { - int offset2_val; - /* Calculate ticker period and phase */ int ticker_period = (int)(str_width + spacer_width); int phase = idx % ticker_period; @@ -334,30 +332,42 @@ static void gfx_animation_ticker_loop(uint64_t idx, * all the string manipulation that has to happen afterwards... */ - /* Initialize offsets and widths */ - *offset1 = (phase < (int)str_width) ? (size_t)phase : 0; - *width1 = (size_t)((phase < (int)str_width) ? (str_width - phase) : 0); + /* String 1 */ + int offset = 0; + int width = (int)(str_width - phase); + if (width < 0) + width = 0; + else if ((width > (int)max_width)) + width = (int)max_width; - /* Clamp width1 to max_width */ - if (*width1 > max_width) - *width1 = max_width; + if (phase < (int)str_width) + offset = phase; - /* Calculate offset2 and width2 */ - offset2_val = (int)(phase - str_width); - *offset2 = (offset2_val < 0) ? 0 : (size_t)offset2_val; + *offset1 = offset; + *width1 = width; - *width2 = (size_t)(max_width - *width1); - if (*width2 > spacer_width) - *width2 = spacer_width; - *width2 -= *offset2; + /* String 2 */ + offset = (int)(phase - str_width); + if (offset < 0) + offset = 0; + width = (int)(max_width - *width1); + if (width > (int)spacer_width) + width = (int)spacer_width; + width -= offset; - /* Calculate width3 */ - *width3 = (size_t)(max_width - (*width1 + *width2)); - if (*width3 < 0) - *width3 = 0; + *offset2 = offset; + *width2 = width; - /* offset3 is always zero */ - *offset3 = 0; + /* String 3 */ + width = (int)(max_width - (*width1 + *width2)); + if (width < 0) + width = 0; + + /* Note: offset is always zero here so offset3 is + * unnecessary - but include it anyway to preserve + * symmetry... */ + *offset3 = 0; + *width3 = width; } static unsigned get_ticker_smooth_generic_scroll_offset( diff --git a/gfx/video_driver.c b/gfx/video_driver.c index 233aa370bf..e787f89dc6 100644 --- a/gfx/video_driver.c +++ b/gfx/video_driver.c @@ -2113,71 +2113,73 @@ void video_viewport_get_scaled_aspect2(struct video_viewport *vp, float device_aspect, float desired_aspect) { settings_t *settings = config_get_ptr(); - video_driver_state_t *video_st = &video_driver_st; - int x = 0, y = 0; - float vp_bias_x = settings->floats.video_vp_bias_x; - float vp_bias_y = settings->floats.video_vp_bias_y; - + video_driver_state_t + *video_st = &video_driver_st; + int x = 0; + int y = 0; + float vp_bias_x = settings->floats.video_vp_bias_x; + float vp_bias_y = settings->floats.video_vp_bias_y; #if defined(RARCH_MOBILE) if (vp_width < vp_height) { - vp_bias_x = settings->floats.video_vp_bias_portrait_x; - vp_bias_y = settings->floats.video_vp_bias_portrait_y; + vp_bias_x = settings->floats.video_vp_bias_portrait_x; + vp_bias_y = settings->floats.video_vp_bias_portrait_y; } #endif - if (!y_down) - vp_bias_y = 1.0f - vp_bias_y; + vp_bias_y = 1.0 - vp_bias_y; if (settings->uints.video_aspect_ratio_idx == ASPECT_RATIO_CUSTOM) { - video_viewport_t *custom_vp = &settings->video_vp_custom; - int padding_x = vp_width - custom_vp->width; - int padding_y = vp_height - custom_vp->height; + video_viewport_t + *custom_vp = &settings->video_vp_custom; + int padding_x = 0; + int padding_y = 0; - x = custom_vp->x; - y = custom_vp->y; + x = custom_vp->x; + y = custom_vp->y; if (!y_down) - y = vp->full_height - (y + custom_vp->height); - - /* Adjust padding directly without checking negative */ + y = vp->full_height - (y + custom_vp->height); + padding_x += (vp_width - custom_vp->width); if (padding_x < 0) - padding_x = -padding_x; + padding_x *= 2; + padding_y = vp_height - custom_vp->height; if (padding_y < 0) - padding_y = -padding_y; - - vp_width = custom_vp->width; - vp_height = custom_vp->height; - - x += (int)(padding_x * vp_bias_x); - y += (int)(padding_y * vp_bias_y); + padding_y *= 2; + vp_width = custom_vp->width; + vp_height = custom_vp->height; + x += padding_x * vp_bias_x; + y += padding_y * vp_bias_y; } else { float delta; - float aspect_diff = fabsf(device_aspect - desired_aspect); - if (aspect_diff >= 0.0001f) + if (fabsf(device_aspect - desired_aspect) < 0.0001f) { - if (device_aspect > desired_aspect) - { - delta = (desired_aspect / device_aspect - 1.0f) / 2.0f + 0.5f; - x += (int)roundf(vp_width * (0.5f - delta) * vp_bias_x * 2.0f); - vp_width = (unsigned)roundf(vp_width * delta * 2.0f); - } - else - { - delta = (device_aspect / desired_aspect - 1.0f) / 2.0f + 0.5f; - y += (int)roundf(vp_height * (0.5f - delta) * vp_bias_y * 2.0f); - vp_height = (unsigned)roundf(vp_height * delta * 2.0f); - } + /* If the aspect ratios of screen and desired aspect + * ratio are sufficiently equal (floating point stuff), + * assume they are actually equal. + */ + } + else if (device_aspect > desired_aspect) + { + delta = (desired_aspect / device_aspect - 1.0f) / 2.0f + 0.5f; + x += (int)roundf(vp_width * ((0.5f - delta) * (vp_bias_x * 2.0f))); + vp_width = (unsigned)roundf(2.0f * vp_width * delta); + } + else + { + delta = (device_aspect / desired_aspect - 1.0f) / 2.0f + 0.5f; + y += (int)roundf(vp_height * ((0.5f - delta) * (vp_bias_y * 2.0f))); + vp_height = (unsigned)roundf(2.0f * vp_height * delta); } } - vp->x = x; - vp->y = y; - vp->width = vp_width; + vp->x = x; + vp->y = y; + vp->width = vp_width; vp->height = vp_height; /* Statistics */ diff --git a/libretro-common/audio/conversion/float_to_s16.c b/libretro-common/audio/conversion/float_to_s16.c index e16b6543b4..5f2ab2405a 100644 --- a/libretro-common/audio/conversion/float_to_s16.c +++ b/libretro-common/audio/conversion/float_to_s16.c @@ -109,11 +109,13 @@ void convert_float_to_s16(int16_t *s, const float *in, size_t len) _mm_storeu_si128((__m128i *)s, packed); /* Then put the result in the output array */ } - len -= i; + len = len - i; i = 0; /* If there are any stray samples at the end, we need to convert them * (maybe the original array didn't contain a multiple of 8 samples) */ #elif defined(__ALTIVEC__) + int samples_in = len; + /* Unaligned loads/store is a bit expensive, * so we optimize for the good path (very likely). */ if (((uintptr_t)s & 15) + ((uintptr_t)in & 15) == 0) @@ -128,11 +130,20 @@ void convert_float_to_s16(int16_t *s, const float *in, size_t len) vec_st(vec_packs(result0, result1), 0, s); } - len -= i; + samples_in -= i; } + len = samples_in; i = 0; #elif defined(_MIPS_ARCH_ALLEGREX) +#ifdef DEBUG + /* Make sure the buffers are 16 byte aligned, this should be + * the default behaviour of malloc in the PSPSDK. + * Assume alignment. */ + retro_assert(((uintptr_t)in & 0xf) == 0); + retro_assert(((uintptr_t)s & 0xf) == 0); +#endif + for (i = 0; i + 8 <= len; i += 8) { __asm__ ( diff --git a/libretro-common/audio/conversion/s16_to_float.c b/libretro-common/audio/conversion/s16_to_float.c index 1a28e3c0e8..2850193cf2 100644 --- a/libretro-common/audio/conversion/s16_to_float.c +++ b/libretro-common/audio/conversion/s16_to_float.c @@ -111,9 +111,11 @@ void convert_s16_to_float(float *s, _mm_storeu_ps(s + 4, output_r); } - len -= i; + len = len - i; i = 0; #elif defined(__ALTIVEC__) + size_t samples_in = len; + /* Unaligned loads/store is a bit expensive, so we * optimize for the good path (very likely). */ if (((uintptr_t)s & 15) + ((uintptr_t)in & 15) == 0) @@ -133,15 +135,23 @@ void convert_s16_to_float(float *s, vec_st(out_lo, 16, s); } - len -= i; + samples_in -= i; } + len = samples_in; i = 0; #endif gain /= 0x8000; #if defined(_MIPS_ARCH_ALLEGREX) +#ifdef DEBUG + /* Make sure the buffer is 16 byte aligned, this should be the + * default behaviour of malloc in the PSPSDK. + * Only the output buffer can be assumed to be 16-byte aligned. */ + retro_assert(((uintptr_t)s & 0xf) == 0); +#endif + __asm__ ( ".set push \n" ".set noreorder \n" diff --git a/libretro-common/audio/resampler/drivers/sinc_resampler.c b/libretro-common/audio/resampler/drivers/sinc_resampler.c index bd549e85a3..226d8aeb8b 100644 --- a/libretro-common/audio/resampler/drivers/sinc_resampler.c +++ b/libretro-common/audio/resampler/drivers/sinc_resampler.c @@ -103,43 +103,43 @@ void process_sinc_neon_asm(float *out, const float *left, */ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data *data) { - rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_; - unsigned phases = 1 << (re->phase_bits + re->subphase_bits); + rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; + unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); uint32_t ratio = phases / data->ratio; const float *input = data->data_in; float *output = data->data_out; size_t frames = data->input_frames; size_t out_frames = 0; - unsigned taps = re->taps; + unsigned taps = resamp->taps; while (frames) { - while (frames && re->time >= phases) + while (frames && resamp->time >= phases) { /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; - re->buffer_l[re->ptr + taps] = - re->buffer_l[re->ptr ] = *input++; + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; - re->buffer_r[re->ptr + taps] = - re->buffer_r[re->ptr ] = *input++; + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; - re->time -= phases; + resamp->time -= phases; frames--; } { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { - int i; - unsigned phase = re->time >> re->subphase_bits; - const float *phase_table = re->phase_table + phase * taps * 2; + unsigned phase = resamp->time >> resamp->subphase_bits; + const float *phase_table = resamp->phase_table + phase * taps * 2; const float *delta_table = phase_table + taps; - float32x4_t delta = vdupq_n_f32((re->time & re->subphase_mask) * re->subphase_mod); + float32x4_t delta = vdupq_n_f32((resamp->time & resamp->subphase_mask) * resamp->subphase_mod); + int i; float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0}; float32x2_t p3, p4; @@ -164,7 +164,7 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data vst1_f32(output, vpadd_f32(p3, p4)); output += 2; out_frames++; - re->time += ratio; + resamp->time += ratio; } } } @@ -176,47 +176,47 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data /* Assumes that taps >= 8, and that taps is a multiple of 8. */ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data) { - rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_; - unsigned phases = 1 << (re->phase_bits + re->subphase_bits); + rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; + unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); uint32_t ratio = phases / data->ratio; const float *input = data->data_in; float *output = data->data_out; size_t frames = data->input_frames; size_t out_frames = 0; - unsigned taps = re->taps; + unsigned taps = resamp->taps; while (frames) { - while (frames && re->time >= phases) + while (frames && resamp->time >= phases) { /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; - re->buffer_l[re->ptr + taps] = - re->buffer_l[re->ptr ] = *input++; + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; - re->buffer_r[re->ptr + taps] = - re->buffer_r[re->ptr ] = *input++; + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; - re->time -= phases; + resamp->time -= phases; frames--; } { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { - unsigned phase = re->time >> re->subphase_bits; - const float *phase_table = re->phase_table + phase * taps; + unsigned phase = resamp->time >> resamp->subphase_bits; + const float *phase_table = resamp->phase_table + phase * taps; #ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS process_sinc_neon_asm(output, buffer_l, buffer_r, phase_table, taps); #else int i; - float32x2_t p3, p4; float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0}; + float32x2_t p3, p4; for (i = 0; i < (int)taps; i += 8) { @@ -234,9 +234,9 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data) p4 = vadd_f32(vget_low_f32(p2), vget_high_f32(p2)); vst1_f32(output, vpadd_f32(p3, p4)); #endif + output += 2; out_frames++; - output += 2; - re->time += ratio; + resamp->time += ratio; } } } @@ -248,47 +248,49 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data) #if defined(__AVX__) static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *data) { - rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_; - unsigned phases = 1 << (re->phase_bits + re->subphase_bits); + rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; + unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); uint32_t ratio = phases / data->ratio; const float *input = data->data_in; float *output = data->data_out; size_t frames = data->input_frames; size_t out_frames = 0; - unsigned taps = re->taps; + unsigned taps = resamp->taps; + { while (frames) { - while (frames && re->time >= phases) + while (frames && resamp->time >= phases) { /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; - re->buffer_l[re->ptr + taps] = - re->buffer_l[re->ptr ] = *input++; + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; - re->buffer_r[re->ptr + taps] = - re->buffer_r[re->ptr ] = *input++; + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; - re->time -= phases; + resamp->time -= phases; frames--; } { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { int i; - unsigned phase = re->time >> re->subphase_bits; + unsigned phase = resamp->time >> resamp->subphase_bits; - float *phase_table = re->phase_table + phase * taps * 2; - float *delta_table = phase_table + taps; + float *phase_table = resamp->phase_table + phase * taps * 2; + float *delta_table = phase_table + taps; __m256 delta = _mm256_set1_ps((float) - (re->time & re->subphase_mask) * re->subphase_mod); + (resamp->time & resamp->subphase_mask) * resamp->subphase_mod); + __m256 sum_l = _mm256_setzero_ps(); __m256 sum_r = _mm256_setzero_ps(); @@ -318,56 +320,58 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data * _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0)); _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0)); + output += 2; out_frames++; - output += 2; - re->time += ratio; + resamp->time += ratio; } } } + } data->output_frames = out_frames; } static void resampler_sinc_process_avx(void *re_, struct resampler_data *data) { - rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_; - unsigned phases = 1 << (re->phase_bits + re->subphase_bits); + rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; + unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); uint32_t ratio = phases / data->ratio; const float *input = data->data_in; float *output = data->data_out; size_t frames = data->input_frames; size_t out_frames = 0; - unsigned taps = re->taps; + unsigned taps = resamp->taps; + { while (frames) { - while (frames && re->time >= phases) + while (frames && resamp->time >= phases) { /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; - re->buffer_l[re->ptr + taps] = - re->buffer_l[re->ptr ] = *input++; + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; - re->buffer_r[re->ptr + taps] = - re->buffer_r[re->ptr ] = *input++; + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; - re->time -= phases; + resamp->time -= phases; frames--; } { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { int i; __m256 delta; - unsigned phase = re->time >> re->subphase_bits; - float *phase_table = re->phase_table + phase * taps; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps; __m256 sum_l = _mm256_setzero_ps(); __m256 sum_r = _mm256_setzero_ps(); @@ -396,12 +400,13 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data) _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0)); _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0)); + output += 2; out_frames++; - output += 2; - re->time += ratio; + resamp->time += ratio; } } } + } data->output_frames = out_frames; } @@ -410,47 +415,48 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data) #if defined(__SSE__) static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *data) { - rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_; - unsigned phases = 1 << (re->phase_bits + re->subphase_bits); + rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; + unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); uint32_t ratio = phases / data->ratio; const float *input = data->data_in; float *output = data->data_out; size_t frames = data->input_frames; size_t out_frames = 0; - unsigned taps = re->taps; + unsigned taps = resamp->taps; + { while (frames) { - while (frames && re->time >= phases) + while (frames && resamp->time >= phases) { /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; - re->buffer_l[re->ptr + taps] = - re->buffer_l[re->ptr ] = *input++; + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; - re->buffer_r[re->ptr + taps] = - re->buffer_r[re->ptr ] = *input++; + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; - re->time -= phases; + resamp->time -= phases; frames--; } { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { int i; __m128 sum; - unsigned phase = re->time >> re->subphase_bits; - float *phase_table = re->phase_table + phase * taps * 2; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps * 2; float *delta_table = phase_table + taps; __m128 delta = _mm_set1_ps((float) - (re->time & re->subphase_mask) * re->subphase_mod); + (resamp->time & resamp->subphase_mask) * resamp->subphase_mod); __m128 sum_l = _mm_setzero_ps(); __m128 sum_r = _mm_setzero_ps(); @@ -491,56 +497,58 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data * /* movehl { X, R, X, L } == { X, R, X, R } */ _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); + output += 2; out_frames++; - output += 2; - re->time += ratio; + resamp->time += ratio; } } } + } data->output_frames = out_frames; } static void resampler_sinc_process_sse(void *re_, struct resampler_data *data) { - rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_; - unsigned phases = 1 << (re->phase_bits + re->subphase_bits); + rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; + unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); uint32_t ratio = phases / data->ratio; const float *input = data->data_in; float *output = data->data_out; size_t frames = data->input_frames; size_t out_frames = 0; - unsigned taps = re->taps; + unsigned taps = resamp->taps; + { while (frames) { - while (frames && re->time >= phases) + while (frames && resamp->time >= phases) { /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; - re->buffer_l[re->ptr + taps] = - re->buffer_l[re->ptr ] = *input++; + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; - re->buffer_r[re->ptr + taps] = - re->buffer_r[re->ptr ] = *input++; + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; - re->time -= phases; + resamp->time -= phases; frames--; } { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { int i; __m128 sum; - unsigned phase = re->time >> re->subphase_bits; - float *phase_table = re->phase_table + phase * taps; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps; __m128 sum_l = _mm_setzero_ps(); __m128 sum_r = _mm_setzero_ps(); @@ -559,9 +567,9 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data) * sum_r = { r3, r2, r1, r0 } */ - sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, - _MM_SHUFFLE(1, 0, 1, 0)), - _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); + sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, + _MM_SHUFFLE(1, 0, 1, 0)), + _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } * sum = { R1, R0, L1, L0 } @@ -579,12 +587,13 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data) /* movehl { X, R, X, L } == { X, R, X, R } */ _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); + output += 2; out_frames++; - output += 2; - re->time += ratio; + resamp->time += ratio; } } } + } data->output_frames = out_frames; } @@ -592,48 +601,49 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data) static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *data) { - rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_; - unsigned phases = 1 << (re->phase_bits + re->subphase_bits); + rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; + unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); uint32_t ratio = phases / data->ratio; const float *input = data->data_in; float *output = data->data_out; size_t frames = data->input_frames; size_t out_frames = 0; - unsigned taps = re->taps; + unsigned taps = resamp->taps; - while (frames) { - while (frames && re->time >= phases) + while (frames) + { + while (frames && resamp->time >= phases) { /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; - re->buffer_l[re->ptr + taps] = - re->buffer_l[re->ptr ] = *input++; + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; - re->buffer_r[re->ptr + taps] = - re->buffer_r[re->ptr ] = *input++; + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; - re->time -= phases; + resamp->time -= phases; frames--; } { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { int i; float sum_l = 0.0f; float sum_r = 0.0f; - unsigned phase = re->time >> re->subphase_bits; - float *phase_table = re->phase_table + phase * taps * 2; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps * 2; float *delta_table = phase_table + taps; float delta = (float) - (re->time & re->subphase_mask) * re->subphase_mod; + (resamp->time & resamp->subphase_mask) * resamp->subphase_mod; for (i = 0; i < (int)taps; i++) { @@ -648,9 +658,11 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da output += 2; out_frames++; - re->time += ratio; + resamp->time += ratio; } } + + } } data->output_frames = out_frames; @@ -658,45 +670,46 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da static void resampler_sinc_process_c(void *re_, struct resampler_data *data) { - rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_; - unsigned phases = 1 << (re->phase_bits + re->subphase_bits); + rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; + unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); uint32_t ratio = phases / data->ratio; const float *input = data->data_in; float *output = data->data_out; size_t frames = data->input_frames; size_t out_frames = 0; - unsigned taps = re->taps; + unsigned taps = resamp->taps; + { while (frames) { - while (frames && re->time >= phases) + while (frames && resamp->time >= phases) { /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; - re->buffer_l[re->ptr + taps] = - re->buffer_l[re->ptr ] = *input++; + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; - re->buffer_r[re->ptr + taps] = - re->buffer_r[re->ptr ] = *input++; + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; - re->time -= phases; + resamp->time -= phases; frames--; } { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { int i; float sum_l = 0.0f; float sum_r = 0.0f; - unsigned phase = re->time >> re->subphase_bits; - float *phase_table = re->phase_table + phase * taps; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps; for (i = 0; i < (int)taps; i++) { @@ -711,30 +724,31 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data) output += 2; out_frames++; - re->time += ratio; + resamp->time += ratio; } } } + } data->output_frames = out_frames; } static void resampler_sinc_free(void *data) { - rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)data; - if (re) - memalign_free(re->main_buffer); - free(re); + rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)data; + if (resamp) + memalign_free(resamp->main_buffer); + free(resamp); } -static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re, +static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp, double cutoff, float *phase_table, int phases, int taps, bool calculate_delta) { int i, j; /* Kaiser window function - need to normalize w(0) to 1.0f */ - float kaiser_beta = re->kaiser_beta; + float kaiser_beta = resamp->kaiser_beta; double window_mod = besseli0(kaiser_beta); int stride = calculate_delta ? 2 : 1; double sidelobes = taps / 2.0; @@ -743,10 +757,13 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re, { for (j = 0; j < taps; j++) { + float val; + double sinc_phase; int n = j * phases + i; - double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0; - double sinc_phase = sidelobes * window_phase; - float val = cutoff * sinc(M_PI * sinc_phase * cutoff) * + double window_phase = (double)n / (phases * taps); /* [0, 1). */ + window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */ + sinc_phase = sidelobes * window_phase; + val = cutoff * sinc(M_PI * sinc_phase * cutoff) * besseli0(kaiser_beta * sqrtf(1 - window_phase * window_phase)) / window_mod; phase_table[i * stride * taps + j] = val; @@ -771,20 +788,24 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re, phase = phases - 1; for (j = 0; j < taps; j++) { + float val, delta; + double sinc_phase; int n = j * phases + (phase + 1); - double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0; - double sinc_phase = sidelobes * window_phase; - float val = cutoff * sinc(M_PI * sinc_phase * cutoff) - * besseli0(re->kaiser_beta * sqrtf(1 - window_phase - * window_phase)) / window_mod; - float delta = (val - phase_table[phase * stride * taps + j]); + double window_phase = (double)n / (phases * taps); /* (0, 1]. */ + window_phase = 2.0 * window_phase - 1.0; /* (-1, 1] */ + sinc_phase = sidelobes * window_phase; + + val = cutoff * sinc(M_PI * sinc_phase * cutoff) * + besseli0(resamp->kaiser_beta * sqrtf(1 - window_phase * +window_phase)) / window_mod; + delta = (val - phase_table[phase * stride * taps + j]); phase_table[(phase * stride + 1) * taps + j] = delta; } } } static void sinc_init_table_lanczos( - rarch_sinc_resampler_t *re, double cutoff, + rarch_sinc_resampler_t *resamp, double cutoff, float *phase_table, int phases, int taps, bool calculate_delta) { int i, j; @@ -797,10 +818,13 @@ static void sinc_init_table_lanczos( { for (j = 0; j < taps; j++) { + double sinc_phase; + float val; int n = j * phases + i; - double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0; /* [-1, 1) */ - double sinc_phase = sidelobes * window_phase; - float val = cutoff * sinc(M_PI * sinc_phase * cutoff) * + double window_phase = (double)n / (phases * taps); /* [0, 1). */ + window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */ + sinc_phase = sidelobes * window_phase; + val = cutoff * sinc(M_PI * sinc_phase * cutoff) * sinc(M_PI * window_phase) / window_mod; phase_table[i * stride * taps + j] = val; } @@ -824,12 +848,16 @@ static void sinc_init_table_lanczos( phase = phases - 1; for (j = 0; j < taps; j++) { + float val, delta; + double sinc_phase; int n = j * phases + (phase + 1); - double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0; - double sinc_phase = sidelobes * window_phase; - float val = cutoff * sinc(M_PI * sinc_phase * cutoff) * + double window_phase = (double)n / (phases * taps); /* (0, 1]. */ + window_phase = 2.0 * window_phase - 1.0; /* (-1, 1] */ + sinc_phase = sidelobes * window_phase; + + val = cutoff * sinc(M_PI * sinc_phase * cutoff) * sinc(M_PI * window_phase) / window_mod; - float delta = (val - phase_table[phase * stride * taps + j]); + delta = (val - phase_table[phase * stride * taps + j]); phase_table[(phase * stride + 1) * taps + j] = delta; } } diff --git a/libretro-common/compat/compat_strl.c b/libretro-common/compat/compat_strl.c index cdf7c49cbe..d11a8c6699 100644 --- a/libretro-common/compat/compat_strl.c +++ b/libretro-common/compat/compat_strl.c @@ -20,14 +20,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* Implementation of strlcpy()/strlcat() based on OpenBSD. */ - -#ifndef __MACH__ #include #include #include +/* Implementation of strlcpy()/strlcat() based on OpenBSD. */ + +#ifndef __MACH__ size_t strlcpy(char *s, const char *source, size_t len) { size_t _len = len; @@ -52,4 +52,4 @@ size_t strlcat(char *s, const char *source, size_t len) len -= _len; return _len + strlcpy(s, source, len); } -#endif \ No newline at end of file +#endif diff --git a/libretro-common/file/file_path.c b/libretro-common/file/file_path.c index 8a02e57c34..9b7def5796 100644 --- a/libretro-common/file/file_path.c +++ b/libretro-common/file/file_path.c @@ -353,25 +353,10 @@ size_t fill_pathname(char *s, const char *in_path, **/ char *find_last_slash(const char *str) { - const char *p; - const char *last_slash = NULL; - const char *last_backslash = NULL; - - /* Traverse the string once */ - for (p = str; *p != '\0'; ++p) - { - if (*p == '/') - last_slash = p; /* Update last forward slash */ - else if (*p == '\\') - last_backslash = p; /* Update last backslash */ - } - - /* Determine which one is last */ - if (!last_slash) /* Backslash */ - return (char*)last_backslash; - if (!last_backslash) /* Forward slash */ - return (char*)last_slash; - return (last_backslash > last_slash) ? (char*)last_backslash : (char*)last_slash; + const char *slash = strrchr(str, '/'); + const char *backslash = strrchr(str, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; + return last_slash; } /** @@ -384,15 +369,14 @@ char *find_last_slash(const char *str) **/ size_t fill_pathname_slash(char *s, size_t len) { - char *last_slash = find_last_slash(s); - len = strlen(s); + const char *slash = strrchr(s, '/'); + const char *backslash = strrchr(s, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (!last_slash) - { - s[ len] = PATH_DEFAULT_SLASH_C(); - s[++len] = '\0'; - } + return strlcat(s, PATH_DEFAULT_SLASH(), len); + len = strlen(s); /* Try to preserve slash type. */ - else if (last_slash != (s + len - 1)) + if (last_slash != (s + len - 1)) { s[ len] = last_slash[0]; s[++len] = '\0'; @@ -478,12 +462,16 @@ size_t fill_pathname_parent_dir_name(char *s, const char *in_dir, size_t len) { size_t _len = 0; char *tmp = strdup(in_dir); - char *last_slash = find_last_slash(tmp); + const char *slash = strrchr(tmp, '/'); + const char *backslash = strrchr(tmp, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (last_slash && last_slash[1] == 0) { *last_slash = '\0'; - last_slash = find_last_slash(tmp); + slash = strrchr(tmp, '/'); + backslash = strrchr(tmp, '\\'); + last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; } /* Cut the last part of the string (the filename) after the slash, @@ -494,7 +482,9 @@ size_t fill_pathname_parent_dir_name(char *s, const char *in_dir, size_t len) /* Point in_dir to the address of the last slash. * If in_dir is NULL, it means there was no slash in tmp, * so use tmp as-is. */ - in_dir = find_last_slash(tmp); + slash = strrchr(tmp, '/'); + backslash = strrchr(tmp, '\\'); + in_dir = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (!in_dir) in_dir = tmp; @@ -602,19 +592,23 @@ size_t fill_str_dated_filename(char *s, **/ size_t path_basedir(char *s) { + const char *slash; + const char *backslash; char *last_slash = NULL; if (!s || s[0] == '\0' || s[1] == '\0') return (s && s[0] != '\0') ? 1 : 0; - last_slash = find_last_slash(s); - if (!last_slash) + slash = strrchr(s, '/'); + backslash = strrchr(s, '\\'); + last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; + if (last_slash) { - s[0] = '.'; - s[1] = PATH_DEFAULT_SLASH_C(); - s[2] = '\0'; - return 2; + last_slash[1] = '\0'; + return last_slash + 1 - s; } - last_slash[1] = '\0'; - return last_slash + 1 - s; + s[0] = '.'; + s[1] = PATH_DEFAULT_SLASH_C(); + s[2] = '\0'; + return 2; } /** @@ -636,9 +630,15 @@ size_t path_parent_dir(char *s, size_t len) if (len && PATH_CHAR_IS_SLASH(s[len - 1])) { char *last_slash; + const char *slash; + const char *backslash; bool was_absolute = path_is_absolute(s); + s[len - 1] = '\0'; - last_slash = find_last_slash(s); + + slash = strrchr(s, '/'); + backslash = strrchr(s, '\\'); + last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (was_absolute && !last_slash) { @@ -667,7 +667,9 @@ const char *path_basename(const char *path) /* We cut either at the first compression-related hash, * or we cut at the last slash */ const char *ptr = NULL; - char *last_slash = find_last_slash(path); + const char *slash = strrchr(path, '/'); + const char *backslash = strrchr(path, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; return ((ptr = path_get_archive_delim(path)) || (ptr = last_slash)) ? (ptr + 1) : path; } @@ -685,7 +687,9 @@ const char *path_basename(const char *path) const char *path_basename_nocompression(const char *path) { /* We cut at the last slash */ - char *last_slash = find_last_slash(path); + const char *slash = strrchr(path, '/'); + const char *backslash = strrchr(path, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; return (last_slash) ? (last_slash + 1) : path; } @@ -995,18 +999,22 @@ size_t fill_pathname_join_special(char *s, if (*s) { - char *last_slash = find_last_slash(s); - if (!last_slash) + const char *slash = strrchr(s, '/'); + const char *backslash = strrchr(s, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; + if (last_slash) { - s[ _len] = PATH_DEFAULT_SLASH_C(); - s[++_len] = '\0'; - + /* Try to preserve slash type. */ + if (last_slash != (s + _len - 1)) + { + s[ _len] = last_slash[0]; + s[++_len] = '\0'; + } } - /* Try to preserve slash type. */ - else if (last_slash != (s + _len - 1)) + else { - s[ _len] = last_slash[0]; - s[++_len] = '\0'; + s[ _len] = PATH_DEFAULT_SLASH_C(); + s[++_len] = '\0'; } } @@ -1319,6 +1327,8 @@ size_t fill_pathname_abbreviated_or_relative(char *s, **/ void path_basedir_wrapper(char *s) { + const char *slash; + const char *backslash; char *last_slash = NULL; if (!s || s[0] == '\0' || s[1] == '\0') return; @@ -1327,7 +1337,9 @@ void path_basedir_wrapper(char *s) if ((last_slash = (char*)path_get_archive_delim(s))) *last_slash = '\0'; #endif - last_slash = find_last_slash(s); + slash = strrchr(s, '/'); + backslash = strrchr(s, '\\'); + last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (last_slash) last_slash[1] = '\0'; else diff --git a/libretro-common/include/file/file_path.h b/libretro-common/include/file/file_path.h index 733e94bd93..708bdb5170 100644 --- a/libretro-common/include/file/file_path.h +++ b/libretro-common/include/file/file_path.h @@ -628,6 +628,7 @@ void path_basedir_wrapper(char *s); * if not already there. * Hidden non-leaf function cost: + * - can call strlcat once if it returns false * - calls strlen **/ size_t fill_pathname_slash(char *s, size_t len); diff --git a/libretro-common/lists/file_list.c b/libretro-common/lists/file_list.c index f2afc5b5d1..3f50563e95 100644 --- a/libretro-common/lists/file_list.c +++ b/libretro-common/lists/file_list.c @@ -57,17 +57,16 @@ static bool file_list_deinitialize_internal(file_list_t *list) bool file_list_reserve(file_list_t *list, size_t nitems) { - struct item_file *new_data; const size_t item_size = sizeof(struct item_file); + struct item_file *new_data; - if (nitems < list->capacity || nitems > (size_t)-1 / item_size) + if (nitems < list->capacity || nitems > (size_t)-1/item_size) return false; - /* Allocate the new memory block */ if (!(new_data = (struct item_file*)realloc(list->list, nitems * item_size))) return false; - memset(new_data + list->capacity, 0, (nitems - list->capacity) * item_size); + memset(&new_data[list->capacity], 0, item_size * (nitems - list->capacity)); list->list = new_data; list->capacity = nitems; @@ -78,31 +77,57 @@ bool file_list_reserve(file_list_t *list, size_t nitems) bool file_list_insert(file_list_t *list, const char *path, const char *label, unsigned type, size_t directory_ptr, - size_t entry_idx, size_t idx) + size_t entry_idx, + size_t idx) { - struct item_file *new_item; + int i; + /* Expand file list if needed */ - if ( list->size >= list->capacity - && !file_list_reserve(list, list->capacity * 2 + 1)) - return false; + if (list->size >= list->capacity) + if (!file_list_reserve(list, list->capacity * 2 + 1)) + return false; - /* Shift elements to make room for the new item */ - memmove(&list->list[idx + 1], - &list->list[idx], - (list->size - idx) * sizeof(struct item_file)); + for (i = (unsigned)list->size; i > (int)idx; i--) + { + struct item_file *copy = (struct item_file*) + malloc(sizeof(struct item_file)); - /* Initialize the new item */ - new_item = &list->list[idx]; - new_item->path = path ? strdup(path) : NULL; - new_item->label = label ? strdup(label) : NULL; - new_item->alt = NULL; - new_item->type = type; - new_item->directory_ptr = directory_ptr; - new_item->entry_idx = entry_idx; - new_item->userdata = NULL; - new_item->actiondata = NULL; + if (copy) + { + copy->path = NULL; + copy->label = NULL; + copy->alt = NULL; + copy->type = 0; + copy->directory_ptr = 0; + copy->entry_idx = 0; + copy->userdata = NULL; + copy->actiondata = NULL; + + memcpy(copy, &list->list[i-1], sizeof(struct item_file)); + + memcpy(&list->list[i-1], &list->list[i], sizeof(struct item_file)); + memcpy(&list->list[i], copy, sizeof(struct item_file)); + + free(copy); + } + } + + list->list[idx].path = NULL; + list->list[idx].label = NULL; + list->list[idx].alt = NULL; + list->list[idx].type = type; + list->list[idx].directory_ptr = directory_ptr; + list->list[idx].entry_idx = entry_idx; + list->list[idx].userdata = NULL; + list->list[idx].actiondata = NULL; + + if (label) + list->list[idx].label = strdup(label); + if (path) + list->list[idx].path = strdup(path); list->size++; + return true; }