From 215219dc516d6d721e61cddcfb5daeeaed6e0474 Mon Sep 17 00:00:00 2001 From: libretroadmin Date: Mon, 14 Jul 2025 05:50:17 +0200 Subject: [PATCH] Change libretro-common code back --- .../audio/resampler/drivers/sinc_resampler.c | 630 +++++++++--------- libretro-common/compat/compat_strl.c | 22 +- libretro-common/file/file_path.c | 125 ++-- libretro-common/file/nbio/nbio_windowsmmap.c | 49 +- libretro-common/gfx/scaler/scaler_filter.c | 52 +- libretro-common/gfx/scaler/scaler_int.c | 61 +- libretro-common/include/file/file_path.h | 2 + libretro-common/lists/file_list.c | 148 ++-- 8 files changed, 571 insertions(+), 518 deletions(-) diff --git a/libretro-common/audio/resampler/drivers/sinc_resampler.c b/libretro-common/audio/resampler/drivers/sinc_resampler.c index 3d3967436f..bd549e85a3 100644 --- a/libretro-common/audio/resampler/drivers/sinc_resampler.c +++ b/libretro-common/audio/resampler/drivers/sinc_resampler.c @@ -121,10 +121,10 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data re->ptr--; re->buffer_l[re->ptr + taps] = - re->buffer_l[re->ptr ] = *input++; + re->buffer_l[re->ptr ] = *input++; re->buffer_r[re->ptr + taps] = - re->buffer_r[re->ptr ] = *input++; + re->buffer_r[re->ptr ] = *input++; re->time -= phases; frames--; @@ -195,10 +195,10 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data) re->ptr--; re->buffer_l[re->ptr + taps] = - re->buffer_l[re->ptr ] = *input++; + re->buffer_l[re->ptr ] = *input++; re->buffer_r[re->ptr + taps] = - re->buffer_r[re->ptr ] = *input++; + re->buffer_r[re->ptr ] = *input++; re->time -= phases; frames--; @@ -258,72 +258,72 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data * size_t out_frames = 0; unsigned taps = re->taps; - while (frames) - { - while (frames && re->time >= phases) + while (frames) { - /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + while (frames && re->time >= phases) + { + /* Push in reverse to make filter more obvious. */ + if (!re->ptr) + re->ptr = taps; + re->ptr--; - re->buffer_l[re->ptr + taps] = + re->buffer_l[re->ptr + taps] = re->buffer_l[re->ptr ] = *input++; - re->buffer_r[re->ptr + taps] = + re->buffer_r[re->ptr + taps] = re->buffer_r[re->ptr ] = *input++; - re->time -= phases; - frames--; - } + re->time -= phases; + frames--; + } - { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) { - int i; - unsigned phase = re->time >> re->subphase_bits; - - float *phase_table = re->phase_table + phase * taps * 2; - float *delta_table = phase_table + taps; - __m256 delta = _mm256_set1_ps((float) - (re->time & re->subphase_mask) * re->subphase_mod); - __m256 sum_l = _mm256_setzero_ps(); - __m256 sum_r = _mm256_setzero_ps(); - - for (i = 0; i < (int)taps; i += 8) + const float *buffer_l = re->buffer_l + re->ptr; + const float *buffer_r = re->buffer_r + re->ptr; + while (re->time < phases) { - __m256 buf_l = _mm256_loadu_ps(buffer_l + i); - __m256 buf_r = _mm256_loadu_ps(buffer_r + i); - __m256 deltas = _mm256_load_ps(delta_table + i); - __m256 sinc = _mm256_add_ps(_mm256_load_ps((const float*)phase_table + i), - _mm256_mul_ps(deltas, delta)); + int i; + unsigned phase = re->time >> re->subphase_bits; - sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc)); - sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc)); + float *phase_table = re->phase_table + phase * taps * 2; + float *delta_table = phase_table + taps; + __m256 delta = _mm256_set1_ps((float) + (re->time & re->subphase_mask) * re->subphase_mod); + __m256 sum_l = _mm256_setzero_ps(); + __m256 sum_r = _mm256_setzero_ps(); + + for (i = 0; i < (int)taps; i += 8) + { + __m256 buf_l = _mm256_loadu_ps(buffer_l + i); + __m256 buf_r = _mm256_loadu_ps(buffer_r + i); + __m256 deltas = _mm256_load_ps(delta_table + i); + __m256 sinc = _mm256_add_ps(_mm256_load_ps((const float*)phase_table + i), + _mm256_mul_ps(deltas, delta)); + + sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc)); + sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc)); + } + + /* hadd on AVX is weird, and acts on low-lanes + * and high-lanes separately. */ + __m256 res_l = _mm256_hadd_ps(sum_l, sum_l); + __m256 res_r = _mm256_hadd_ps(sum_r, sum_r); + res_l = _mm256_hadd_ps(res_l, res_l); + res_r = _mm256_hadd_ps(res_r, res_r); + res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l); + res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r); + + /* This is optimized to mov %xmmN, [mem]. + * There doesn't seem to be any _mm256_store_ss intrinsic. */ + _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0)); + _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0)); + + out_frames++; + output += 2; + re->time += ratio; } - - /* hadd on AVX is weird, and acts on low-lanes - * and high-lanes separately. */ - __m256 res_l = _mm256_hadd_ps(sum_l, sum_l); - __m256 res_r = _mm256_hadd_ps(sum_r, sum_r); - res_l = _mm256_hadd_ps(res_l, res_l); - res_r = _mm256_hadd_ps(res_r, res_r); - res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l); - res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r); - - /* This is optimized to mov %xmmN, [mem]. - * There doesn't seem to be any _mm256_store_ss intrinsic. */ - _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0)); - _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0)); - - out_frames++; - output += 2; - re->time += ratio; } } - } data->output_frames = out_frames; } @@ -340,68 +340,68 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data) size_t out_frames = 0; unsigned taps = re->taps; - while (frames) - { - while (frames && re->time >= phases) + while (frames) { - /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + while (frames && re->time >= phases) + { + /* Push in reverse to make filter more obvious. */ + if (!re->ptr) + re->ptr = taps; + re->ptr--; - re->buffer_l[re->ptr + taps] = + re->buffer_l[re->ptr + taps] = re->buffer_l[re->ptr ] = *input++; - re->buffer_r[re->ptr + taps] = + re->buffer_r[re->ptr + taps] = re->buffer_r[re->ptr ] = *input++; - re->time -= phases; - frames--; - } + re->time -= phases; + frames--; + } - { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) { - int i; - __m256 delta; - unsigned phase = re->time >> re->subphase_bits; - float *phase_table = re->phase_table + phase * taps; - - __m256 sum_l = _mm256_setzero_ps(); - __m256 sum_r = _mm256_setzero_ps(); - - for (i = 0; i < (int)taps; i += 8) + const float *buffer_l = re->buffer_l + re->ptr; + const float *buffer_r = re->buffer_r + re->ptr; + while (re->time < phases) { - __m256 buf_l = _mm256_loadu_ps(buffer_l + i); - __m256 buf_r = _mm256_loadu_ps(buffer_r + i); - __m256 sinc = _mm256_load_ps((const float*)phase_table + i); + int i; + __m256 delta; + unsigned phase = re->time >> re->subphase_bits; + float *phase_table = re->phase_table + phase * taps; - sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc)); - sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc)); + __m256 sum_l = _mm256_setzero_ps(); + __m256 sum_r = _mm256_setzero_ps(); + + for (i = 0; i < (int)taps; i += 8) + { + __m256 buf_l = _mm256_loadu_ps(buffer_l + i); + __m256 buf_r = _mm256_loadu_ps(buffer_r + i); + __m256 sinc = _mm256_load_ps((const float*)phase_table + i); + + sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc)); + sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc)); + } + + /* hadd on AVX is weird, and acts on low-lanes + * and high-lanes separately. */ + __m256 res_l = _mm256_hadd_ps(sum_l, sum_l); + __m256 res_r = _mm256_hadd_ps(sum_r, sum_r); + res_l = _mm256_hadd_ps(res_l, res_l); + res_r = _mm256_hadd_ps(res_r, res_r); + res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l); + res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r); + + /* This is optimized to mov %xmmN, [mem]. + * There doesn't seem to be any _mm256_store_ss intrinsic. */ + _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0)); + _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0)); + + out_frames++; + output += 2; + re->time += ratio; } - - /* hadd on AVX is weird, and acts on low-lanes - * and high-lanes separately. */ - __m256 res_l = _mm256_hadd_ps(sum_l, sum_l); - __m256 res_r = _mm256_hadd_ps(sum_r, sum_r); - res_l = _mm256_hadd_ps(res_l, res_l); - res_r = _mm256_hadd_ps(res_r, res_r); - res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l); - res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r); - - /* This is optimized to mov %xmmN, [mem]. - * There doesn't seem to be any _mm256_store_ss intrinsic. */ - _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0)); - _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0)); - - out_frames++; - output += 2; - re->time += ratio; } } - } data->output_frames = out_frames; } @@ -420,83 +420,83 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data * size_t out_frames = 0; unsigned taps = re->taps; - while (frames) - { - while (frames && re->time >= phases) + while (frames) { - /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + while (frames && re->time >= phases) + { + /* Push in reverse to make filter more obvious. */ + if (!re->ptr) + re->ptr = taps; + re->ptr--; - re->buffer_l[re->ptr + taps] = + re->buffer_l[re->ptr + taps] = re->buffer_l[re->ptr ] = *input++; - re->buffer_r[re->ptr + taps] = + re->buffer_r[re->ptr + taps] = re->buffer_r[re->ptr ] = *input++; - re->time -= phases; - frames--; - } + re->time -= phases; + frames--; + } - { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) { - int i; - __m128 sum; - unsigned phase = re->time >> re->subphase_bits; - float *phase_table = re->phase_table + phase * taps * 2; - float *delta_table = phase_table + taps; - __m128 delta = _mm_set1_ps((float) - (re->time & re->subphase_mask) * re->subphase_mod); - - __m128 sum_l = _mm_setzero_ps(); - __m128 sum_r = _mm_setzero_ps(); - - for (i = 0; i < (int)taps; i += 4) + const float *buffer_l = re->buffer_l + re->ptr; + const float *buffer_r = re->buffer_r + re->ptr; + while (re->time < phases) { - __m128 buf_l = _mm_loadu_ps(buffer_l + i); - __m128 buf_r = _mm_loadu_ps(buffer_r + i); - __m128 deltas = _mm_load_ps(delta_table + i); - __m128 _sinc = _mm_add_ps(_mm_load_ps((const float*)phase_table + i), - _mm_mul_ps(deltas, delta)); - sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc)); - sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc)); + int i; + __m128 sum; + unsigned phase = re->time >> re->subphase_bits; + float *phase_table = re->phase_table + phase * taps * 2; + float *delta_table = phase_table + taps; + __m128 delta = _mm_set1_ps((float) + (re->time & re->subphase_mask) * re->subphase_mod); + + __m128 sum_l = _mm_setzero_ps(); + __m128 sum_r = _mm_setzero_ps(); + + for (i = 0; i < (int)taps; i += 4) + { + __m128 buf_l = _mm_loadu_ps(buffer_l + i); + __m128 buf_r = _mm_loadu_ps(buffer_r + i); + __m128 deltas = _mm_load_ps(delta_table + i); + __m128 _sinc = _mm_add_ps(_mm_load_ps((const float*)phase_table + i), + _mm_mul_ps(deltas, delta)); + sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc)); + sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc)); + } + + /* Them annoying shuffles. + * sum_l = { l3, l2, l1, l0 } + * sum_r = { r3, r2, r1, r0 } + */ + + sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, + _MM_SHUFFLE(1, 0, 1, 0)), + _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); + + /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } + * sum = { R1, R0, L1, L0 } + */ + + sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); + + /* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } + * sum = { X, R, X, L } + */ + + /* Store L */ + _mm_store_ss(output + 0, sum); + + /* movehl { X, R, X, L } == { X, R, X, R } */ + _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); + + out_frames++; + output += 2; + re->time += ratio; } - - /* Them annoying shuffles. - * sum_l = { l3, l2, l1, l0 } - * sum_r = { r3, r2, r1, r0 } - */ - - sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, - _MM_SHUFFLE(1, 0, 1, 0)), - _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); - - /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } - * sum = { R1, R0, L1, L0 } - */ - - sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); - - /* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } - * sum = { X, R, X, L } - */ - - /* Store L */ - _mm_store_ss(output + 0, sum); - - /* movehl { X, R, X, L } == { X, R, X, R } */ - _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); - - out_frames++; - output += 2; - re->time += ratio; } } - } data->output_frames = out_frames; } @@ -513,78 +513,78 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data) size_t out_frames = 0; unsigned taps = re->taps; - while (frames) - { - while (frames && re->time >= phases) + while (frames) { - /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + while (frames && re->time >= phases) + { + /* Push in reverse to make filter more obvious. */ + if (!re->ptr) + re->ptr = taps; + re->ptr--; - re->buffer_l[re->ptr + taps] = + re->buffer_l[re->ptr + taps] = re->buffer_l[re->ptr ] = *input++; - re->buffer_r[re->ptr + taps] = + re->buffer_r[re->ptr + taps] = re->buffer_r[re->ptr ] = *input++; - re->time -= phases; - frames--; - } + re->time -= phases; + frames--; + } - { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) { - int i; - __m128 sum; - unsigned phase = re->time >> re->subphase_bits; - float *phase_table = re->phase_table + phase * taps; - - __m128 sum_l = _mm_setzero_ps(); - __m128 sum_r = _mm_setzero_ps(); - - for (i = 0; i < (int)taps; i += 4) + const float *buffer_l = re->buffer_l + re->ptr; + const float *buffer_r = re->buffer_r + re->ptr; + while (re->time < phases) { - __m128 buf_l = _mm_loadu_ps(buffer_l + i); - __m128 buf_r = _mm_loadu_ps(buffer_r + i); - __m128 _sinc = _mm_load_ps((const float*)phase_table + i); - sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc)); - sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc)); + int i; + __m128 sum; + unsigned phase = re->time >> re->subphase_bits; + float *phase_table = re->phase_table + phase * taps; + + __m128 sum_l = _mm_setzero_ps(); + __m128 sum_r = _mm_setzero_ps(); + + for (i = 0; i < (int)taps; i += 4) + { + __m128 buf_l = _mm_loadu_ps(buffer_l + i); + __m128 buf_r = _mm_loadu_ps(buffer_r + i); + __m128 _sinc = _mm_load_ps((const float*)phase_table + i); + sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc)); + sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc)); + } + + /* Them annoying shuffles. + * sum_l = { l3, l2, l1, l0 } + * sum_r = { r3, r2, r1, r0 } + */ + + sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, + _MM_SHUFFLE(1, 0, 1, 0)), + _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); + + /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } + * sum = { R1, R0, L1, L0 } + */ + + sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); + + /* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } + * sum = { X, R, X, L } + */ + + /* Store L */ + _mm_store_ss(output + 0, sum); + + /* movehl { X, R, X, L } == { X, R, X, R } */ + _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); + + out_frames++; + output += 2; + re->time += ratio; } - - /* Them annoying shuffles. - * sum_l = { l3, l2, l1, l0 } - * sum_r = { r3, r2, r1, r0 } - */ - - sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, - _MM_SHUFFLE(1, 0, 1, 0)), - _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); - - /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } - * sum = { R1, R0, L1, L0 } - */ - - sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); - - /* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } - * sum = { X, R, X, L } - */ - - /* Store L */ - _mm_store_ss(output + 0, sum); - - /* movehl { X, R, X, L } == { X, R, X, R } */ - _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); - - out_frames++; - output += 2; - re->time += ratio; } } - } data->output_frames = out_frames; } @@ -604,53 +604,53 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da while (frames) { - while (frames && re->time >= phases) - { - /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + while (frames && re->time >= phases) + { + /* Push in reverse to make filter more obvious. */ + if (!re->ptr) + re->ptr = taps; + re->ptr--; - re->buffer_l[re->ptr + taps] = + re->buffer_l[re->ptr + taps] = re->buffer_l[re->ptr ] = *input++; - re->buffer_r[re->ptr + taps] = + re->buffer_r[re->ptr + taps] = re->buffer_r[re->ptr ] = *input++; - re->time -= phases; - frames--; - } - - { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) - { - int i; - float sum_l = 0.0f; - float sum_r = 0.0f; - unsigned phase = re->time >> re->subphase_bits; - float *phase_table = re->phase_table + phase * taps * 2; - float *delta_table = phase_table + taps; - float delta = (float) - (re->time & re->subphase_mask) * re->subphase_mod; - - for (i = 0; i < (int)taps; i++) - { - float sinc_val = phase_table[i] + delta_table[i] * delta; - - sum_l += buffer_l[i] * sinc_val; - sum_r += buffer_r[i] * sinc_val; - } - - output[0] = sum_l; - output[1] = sum_r; - - output += 2; - out_frames++; - re->time += ratio; + re->time -= phases; + frames--; + } + + { + const float *buffer_l = re->buffer_l + re->ptr; + const float *buffer_r = re->buffer_r + re->ptr; + while (re->time < phases) + { + int i; + float sum_l = 0.0f; + float sum_r = 0.0f; + unsigned phase = re->time >> re->subphase_bits; + float *phase_table = re->phase_table + phase * taps * 2; + float *delta_table = phase_table + taps; + float delta = (float) + (re->time & re->subphase_mask) * re->subphase_mod; + + for (i = 0; i < (int)taps; i++) + { + float sinc_val = phase_table[i] + delta_table[i] * delta; + + sum_l += buffer_l[i] * sinc_val; + sum_r += buffer_r[i] * sinc_val; + } + + output[0] = sum_l; + output[1] = sum_r; + + output += 2; + out_frames++; + re->time += ratio; + } } - } } data->output_frames = out_frames; @@ -668,54 +668,54 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data) size_t out_frames = 0; unsigned taps = re->taps; - while (frames) - { - while (frames && re->time >= phases) + while (frames) { - /* Push in reverse to make filter more obvious. */ - if (!re->ptr) - re->ptr = taps; - re->ptr--; + while (frames && re->time >= phases) + { + /* Push in reverse to make filter more obvious. */ + if (!re->ptr) + re->ptr = taps; + re->ptr--; - re->buffer_l[re->ptr + taps] = + re->buffer_l[re->ptr + taps] = re->buffer_l[re->ptr ] = *input++; - re->buffer_r[re->ptr + taps] = + re->buffer_r[re->ptr + taps] = re->buffer_r[re->ptr ] = *input++; - re->time -= phases; - frames--; - } - - { - const float *buffer_l = re->buffer_l + re->ptr; - const float *buffer_r = re->buffer_r + re->ptr; - while (re->time < phases) - { - int i; - float sum_l = 0.0f; - float sum_r = 0.0f; - unsigned phase = re->time >> re->subphase_bits; - float *phase_table = re->phase_table + phase * taps; - - for (i = 0; i < (int)taps; i++) - { - float sinc_val = phase_table[i]; - - sum_l += buffer_l[i] * sinc_val; - sum_r += buffer_r[i] * sinc_val; - } - - output[0] = sum_l; - output[1] = sum_r; - - output += 2; - out_frames++; - re->time += ratio; + re->time -= phases; + frames--; } - } - } + { + const float *buffer_l = re->buffer_l + re->ptr; + const float *buffer_r = re->buffer_r + re->ptr; + while (re->time < phases) + { + int i; + float sum_l = 0.0f; + float sum_r = 0.0f; + unsigned phase = re->time >> re->subphase_bits; + float *phase_table = re->phase_table + phase * taps; + + for (i = 0; i < (int)taps; i++) + { + float sinc_val = phase_table[i]; + + sum_l += buffer_l[i] * sinc_val; + sum_r += buffer_r[i] * sinc_val; + } + + output[0] = sum_l; + output[1] = sum_r; + + output += 2; + out_frames++; + re->time += ratio; + } + } + + } data->output_frames = out_frames; } @@ -747,7 +747,7 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re, double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0; double sinc_phase = sidelobes * window_phase; float val = cutoff * sinc(M_PI * sinc_phase * cutoff) * - besseli0(kaiser_beta * sqrtf(1 - window_phase * window_phase)) + besseli0(kaiser_beta * sqrtf(1 - window_phase * window_phase)) / window_mod; phase_table[i * stride * taps + j] = val; } @@ -775,8 +775,8 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re, double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0; double sinc_phase = sidelobes * window_phase; float val = cutoff * sinc(M_PI * sinc_phase * cutoff) - * besseli0(re->kaiser_beta * sqrtf(1 - window_phase - * window_phase)) / window_mod; + * besseli0(re->kaiser_beta * sqrtf(1 - window_phase + * window_phase)) / window_mod; float delta = (val - phase_table[phase * stride * taps + j]); phase_table[(phase * stride + 1) * taps + j] = delta; } diff --git a/libretro-common/compat/compat_strl.c b/libretro-common/compat/compat_strl.c index 884f30ef83..cdf7c49cbe 100644 --- a/libretro-common/compat/compat_strl.c +++ b/libretro-common/compat/compat_strl.c @@ -28,18 +28,18 @@ #include -size_t strlcpy(char *s, const char *src, size_t len) +size_t strlcpy(char *s, const char *source, size_t len) { - size_t i, _len = 0, __len; - while (src[_len] != '\0') - _len++; - if (len == 0) - return _len; - __len = (_len >= len) ? len - 1 : _len; - for (i = 0; i < __len; i++) - s[i] = src[i]; - s[__len] = '\0'; - return _len; + size_t _len = len; + size_t __len = 0; + if (_len) + while (--_len && (*s++ = *source++)) __len++; + if (!_len) + { + if (len) *s = '\0'; + while (*source++) __len++; + } + return __len; } size_t strlcat(char *s, const char *source, size_t len) diff --git a/libretro-common/file/file_path.c b/libretro-common/file/file_path.c index 9fc4c0848e..9b7def5796 100644 --- a/libretro-common/file/file_path.c +++ b/libretro-common/file/file_path.c @@ -179,22 +179,45 @@ void path_linked_list_add_path(struct path_linked_list *in_path_llist, **/ const char *path_get_archive_delim(const char *path) { - const char *delim = strchr(path, '#'); + char buf[5]; + /* Find delimiter position + * > Since filenames may contain '#' characters, + * must loop until we find the first '#' that + * is directly *after* a compression extension */ + const char *delim = strchr(path, '#'); + while (delim) { - size_t _len = delim - path; - if (_len >= 4) + /* Check whether this is a known archive type + * > Note: The code duplication here is + * deliberate, to maximise performance */ + if (delim - path > 4) { - if ( string_is_equal(path + _len - 4, ".zip") - || string_is_equal(path + _len - 4, ".apk")) + strlcpy(buf, delim - 4, sizeof(buf)); + buf[4] = '\0'; + + string_to_lower(buf); + + /* Check if this is a '.zip', '.apk' or '.7z' file */ + if ( string_is_equal(buf, ".zip") + || string_is_equal(buf, ".apk") + || string_is_equal(buf + 1, ".7z")) return delim; } - if (_len >= 3) + else if (delim - path > 3) { - if (string_is_equal(path + _len - 3, ".7z")) + strlcpy(buf, delim - 3, sizeof(buf)); + buf[3] = '\0'; + + string_to_lower(buf); + + /* Check if this is a '.7z' file */ + if (string_is_equal(buf, ".7z")) return delim; } - delim = strchr(delim + 1, '#'); + + delim++; + delim = strchr(delim, '#'); } return NULL; @@ -310,8 +333,7 @@ size_t fill_pathname(char *s, const char *in_path, size_t _len = strlcpy(s, in_path, len); if ((tok = (char*)strrchr(path_basename(s), '.'))) { - *tok = '\0'; - _len = tok - s; + *tok = '\0'; _len = tok - s; } _len += strlcpy(s + _len, replace, len - _len); return _len; @@ -331,7 +353,10 @@ size_t fill_pathname(char *s, const char *in_path, **/ char *find_last_slash(const char *str) { - return strrchr(str, '/') ? strrchr(str, '/') : strrchr(str, '\\'); + const char *slash = strrchr(str, '/'); + const char *backslash = strrchr(str, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; + return last_slash; } /** @@ -344,23 +369,19 @@ char *find_last_slash(const char *str) **/ size_t fill_pathname_slash(char *s, size_t len) { - size_t _len = strlen(s); - const char *last_slash = strrchr(s, '/') ? strrchr(s, '/') : strrchr(s, '\\'); + const char *slash = strrchr(s, '/'); + const char *backslash = strrchr(s, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (!last_slash) + return strlcat(s, PATH_DEFAULT_SLASH(), len); + len = strlen(s); + /* Try to preserve slash type. */ + if (last_slash != (s + len - 1)) { - /* If there's no slash found, append a backslash */ - s[_len] = PATH_DEFAULT_SLASH_C(); - s[_len + 1] = '\0'; - _len++; + s[ len] = last_slash[0]; + s[++len] = '\0'; } - else if (last_slash != (s + _len - 1)) - { - /* Try to preserve slash type. */ - s[_len] = last_slash[0]; - s[_len + 1] = '\0'; - _len++; - } - return _len; + return len; } /** @@ -439,14 +460,18 @@ size_t fill_pathname_basedir(char *s, const char *in_path, size_t len) **/ size_t fill_pathname_parent_dir_name(char *s, const char *in_dir, size_t len) { - size_t _len = 0; - char *tmp = strdup(in_dir); - char *last_slash = strrchr(tmp, '/') ? strrchr(tmp, '/') : strrchr(tmp, '\\'); + size_t _len = 0; + char *tmp = strdup(in_dir); + const char *slash = strrchr(tmp, '/'); + const char *backslash = strrchr(tmp, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (last_slash && last_slash[1] == 0) { - *last_slash = '\0'; - last_slash = strrchr(tmp, '/') ? strrchr(tmp, '/') : strrchr(tmp, '\\'); + *last_slash = '\0'; + slash = strrchr(tmp, '/'); + backslash = strrchr(tmp, '\\'); + last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; } /* Cut the last part of the string (the filename) after the slash, @@ -457,9 +482,11 @@ size_t fill_pathname_parent_dir_name(char *s, const char *in_dir, size_t len) /* Point in_dir to the address of the last slash. * If in_dir is NULL, it means there was no slash in tmp, * so use tmp as-is. */ - in_dir = strrchr(tmp, '/') ? strrchr(tmp, '/') : strrchr(tmp, '\\'); + slash = strrchr(tmp, '/'); + backslash = strrchr(tmp, '\\'); + in_dir = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (!in_dir) - in_dir = tmp; + in_dir = tmp; if (in_dir && in_dir[1]) { @@ -545,7 +572,7 @@ size_t fill_str_dated_filename(char *s, rtime_localtime(&cur_time, &tm_); _len = strlcpy(s, in_str, len); if (string_is_empty(ext)) - _len += strftime(s + _len, len - _len, "-%y%m%d-%H%M%S", &tm_); + _len += strftime(s + _len, len - _len, "-%y%m%d-%H%M%S", &tm_); else { _len += strftime(s + _len, len - _len, "-%y%m%d-%H%M%S.", &tm_); @@ -565,10 +592,14 @@ size_t fill_str_dated_filename(char *s, **/ size_t path_basedir(char *s) { + const char *slash; + const char *backslash; char *last_slash = NULL; if (!s || s[0] == '\0' || s[1] == '\0') return (s && s[0] != '\0') ? 1 : 0; - last_slash = strrchr(s, '/') ? strrchr(s, '/') : strrchr(s, '\\'); + slash = strrchr(s, '/'); + backslash = strrchr(s, '\\'); + last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (last_slash) { last_slash[1] = '\0'; @@ -599,9 +630,15 @@ size_t path_parent_dir(char *s, size_t len) if (len && PATH_CHAR_IS_SLASH(s[len - 1])) { char *last_slash; + const char *slash; + const char *backslash; bool was_absolute = path_is_absolute(s); + s[len - 1] = '\0'; - last_slash = strrchr(s, '/') ? strrchr(s, '/') : strrchr(s, '\\'); + + slash = strrchr(s, '/'); + backslash = strrchr(s, '\\'); + last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (was_absolute && !last_slash) { @@ -630,7 +667,9 @@ const char *path_basename(const char *path) /* We cut either at the first compression-related hash, * or we cut at the last slash */ const char *ptr = NULL; - char *last_slash = strrchr(path, '/') ? strrchr(path, '/') : strrchr(path, '\\'); + const char *slash = strrchr(path, '/'); + const char *backslash = strrchr(path, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; return ((ptr = path_get_archive_delim(path)) || (ptr = last_slash)) ? (ptr + 1) : path; } @@ -648,7 +687,9 @@ const char *path_basename(const char *path) const char *path_basename_nocompression(const char *path) { /* We cut at the last slash */ - char *last_slash = strrchr(path, '/') ? strrchr(path, '/') : strrchr(path, '\\'); + const char *slash = strrchr(path, '/'); + const char *backslash = strrchr(path, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; return (last_slash) ? (last_slash + 1) : path; } @@ -958,7 +999,9 @@ size_t fill_pathname_join_special(char *s, if (*s) { - char *last_slash = strrchr(s, '/') ? strrchr(s, '/') : strrchr(s, '\\'); + const char *slash = strrchr(s, '/'); + const char *backslash = strrchr(s, '\\'); + char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (last_slash) { /* Try to preserve slash type. */ @@ -1284,6 +1327,8 @@ size_t fill_pathname_abbreviated_or_relative(char *s, **/ void path_basedir_wrapper(char *s) { + const char *slash; + const char *backslash; char *last_slash = NULL; if (!s || s[0] == '\0' || s[1] == '\0') return; @@ -1292,7 +1337,9 @@ void path_basedir_wrapper(char *s) if ((last_slash = (char*)path_get_archive_delim(s))) *last_slash = '\0'; #endif - last_slash = strrchr(s, '/') ? strrchr(s, '/') : strrchr(s, '\\'); + slash = strrchr(s, '/'); + backslash = strrchr(s, '\\'); + last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash; if (last_slash) last_slash[1] = '\0'; else diff --git a/libretro-common/file/nbio/nbio_windowsmmap.c b/libretro-common/file/nbio/nbio_windowsmmap.c index 2556f3a246..82b1c342df 100644 --- a/libretro-common/file/nbio/nbio_windowsmmap.c +++ b/libretro-common/file/nbio/nbio_windowsmmap.c @@ -77,25 +77,22 @@ static void *nbio_mmap_win32_open(const char * filename, unsigned mode) #else SIZE_T len; #endif - HANDLE file; struct nbio_mmap_win32_t* handle = NULL; void* ptr = NULL; bool is_write = (mode == NBIO_WRITE || mode == NBIO_UPDATE || mode == BIO_WRITE); DWORD access = (is_write ? GENERIC_READ|GENERIC_WRITE : GENERIC_READ); #if !defined(_WIN32) || defined(LEGACY_WIN32) - file = CreateFile(filename, access, FILE_SHARE_ALL, NULL, dispositions[mode], FILE_ATTRIBUTE_NORMAL, NULL); + HANDLE file = CreateFile(filename, access, FILE_SHARE_ALL, NULL, dispositions[mode], FILE_ATTRIBUTE_NORMAL, NULL); #else wchar_t *filename_wide = utf8_to_utf16_string_alloc(filename); - if (!filename_wide) - return NULL; #ifdef __WINRT__ - file = CreateFile2(filename_wide, access, FILE_SHARE_ALL, dispositions[mode], NULL); + HANDLE file = CreateFile2(filename_wide, access, FILE_SHARE_ALL, dispositions[mode], NULL); #else - file = CreateFileW(filename_wide, access, FILE_SHARE_ALL, NULL, dispositions[mode], FILE_ATTRIBUTE_NORMAL, NULL); + HANDLE file = CreateFileW(filename_wide, access, FILE_SHARE_ALL, NULL, dispositions[mode], FILE_ATTRIBUTE_NORMAL, NULL); #endif - free(filename_wide); - filename_wide = NULL; + if (filename_wide) + free(filename_wide); #endif if (file == INVALID_HANDLE_VALUE) @@ -103,49 +100,19 @@ static void *nbio_mmap_win32_open(const char * filename, unsigned mode) #if defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0500 /* GetFileSizeEx is new for Windows 2000 */ - if (!GetFileSizeEx(file, &len)) - { - CloseHandle(file); - return NULL; - } + GetFileSizeEx(file, &len); mem = CreateFileMapping(file, NULL, is_write ? PAGE_READWRITE : PAGE_READONLY, 0, 0, NULL); - if (!mem) - { - CloseHandle(file); - return NULL; - } ptr = MapViewOfFile(mem, is_write ? (FILE_MAP_READ|FILE_MAP_WRITE) : FILE_MAP_READ, 0, 0, len.QuadPart); #else - if (!GetFileSize(file, &len)) - { - CloseHandle(file); - return NULL; - } + GetFileSize(file, &len); mem = CreateFileMapping(file, NULL, is_write ? PAGE_READWRITE : PAGE_READONLY, 0, 0, NULL); - if (!mem) - { - CloseHandle(file); - return NULL; - } ptr = MapViewOfFile(mem, is_write ? (FILE_MAP_READ|FILE_MAP_WRITE) : FILE_MAP_READ, 0, 0, len); #endif + CloseHandle(mem); - if (!ptr) - { - CloseHandle(file); - return NULL; - } - handle = (struct nbio_mmap_win32_t*)malloc(sizeof(struct nbio_mmap_win32_t)); - if (!handle) - { - UnmapViewOfFile(ptr); - CloseHandle(file); - return NULL; - } - handle->file = file; handle->is_write = is_write; #if defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0500 diff --git a/libretro-common/gfx/scaler/scaler_filter.c b/libretro-common/gfx/scaler/scaler_filter.c index 2bf01adbb0..5a557d54ea 100644 --- a/libretro-common/gfx/scaler/scaler_filter.c +++ b/libretro-common/gfx/scaler/scaler_filter.c @@ -48,19 +48,16 @@ static INLINE void gen_filter_bilinear_sub(struct scaler_filter *filter, int i; for (i = 0; i < len; i++, pos += step) { - int filter_val; filter->filter_pos[i] = pos >> 16; - filter_val = (pos & 0xffff) >> 2; - filter->filter[i * 2 + 1] = filter_val; - filter->filter[i * 2 + 0] = FILTER_UNITY - filter_val; + filter->filter[i * 2 + 1] = (pos & 0xffff) >> 2; + filter->filter[i * 2 + 0] = FILTER_UNITY - filter->filter[i * 2 + 1]; } } static INLINE void gen_filter_sinc_sub(struct scaler_filter *filter, - size_t len, int pos, int step, double phase_mul) + int len, int pos, int step, double phase_mul) { - int j; - size_t i; + int i, j; const int sinc_size = filter->filter_len; for (i = 0; i < len; i++, pos += step) @@ -87,7 +84,12 @@ static bool validate_filter(struct scaler_ctx *ctx) for (i = 0; i < ctx->out_width; i++) { if (ctx->horiz.filter_pos[i] > max_w_pos || ctx->horiz.filter_pos[i] < 0) + { +#ifndef NDEBUG + fprintf(stderr, "Out X = %d => In X = %d\n", i, ctx->horiz.filter_pos[i]); +#endif return false; + } } max_h_pos = ctx->in_height - ctx->vert.filter_len; @@ -95,7 +97,12 @@ static bool validate_filter(struct scaler_ctx *ctx) for (i = 0; i < ctx->out_height; i++) { if (ctx->vert.filter_pos[i] > max_h_pos || ctx->vert.filter_pos[i] < 0) + { +#ifndef NDEBUG + fprintf(stderr, "Out Y = %d => In Y = %d\n", i, ctx->vert.filter_pos[i]); +#endif return false; + } } return true; @@ -109,14 +116,17 @@ static void fixup_filter_sub(struct scaler_filter *filter, for (i = 0; i < out_len; i++) { - int postsample = filter->filter_pos[i] - max_pos; - int presample = -filter->filter_pos[i]; - int16_t *base_filter = filter->filter + i * filter->filter_stride; + int postsample = filter->filter_pos[i] - max_pos; + int presample = -filter->filter_pos[i]; if (postsample > 0) { + int16_t *base_filter = NULL; + filter->filter_pos[i] -= postsample; + base_filter = filter->filter + i * filter->filter_stride; + if (postsample > (int)filter->filter_len) memset(base_filter, 0, filter->filter_len * sizeof(int16_t)); else @@ -129,7 +139,10 @@ static void fixup_filter_sub(struct scaler_filter *filter, if (presample > 0) { + int16_t *base_filter = NULL; + filter->filter_pos[i] += presample; + base_filter = filter->filter + i * filter->filter_stride; if (presample > (int)filter->filter_len) memset(base_filter, 0, filter->filter_len * sizeof(int16_t)); @@ -185,17 +198,15 @@ bool scaler_gen_filter(struct scaler_ctx *ctx) if (!ctx->horiz.filter || !ctx->vert.filter) return false; - /* Calculate step sizes */ - x_step = (1 << 16) * ctx->in_width / ctx->out_width; + x_step = (1 << 16) * ctx->in_width / ctx->out_width; y_step = (1 << 16) * ctx->in_height / ctx->out_height; - /* Calculate initial positions */ - x_pos = (1 << 15) * ctx->in_width / ctx->out_width - (1 << 15); - y_pos = (1 << 15) * ctx->in_height / ctx->out_height - (1 << 15); - /* Generate filters based on scaler type */ switch (ctx->scaler_type) { case SCALER_TYPE_POINT: + x_pos = (1 << 15) * ctx->in_width / ctx->out_width - (1 << 15); + y_pos = (1 << 15) * ctx->in_height / ctx->out_height - (1 << 15); + gen_filter_point_sub(&ctx->horiz, ctx->out_width, x_pos, x_step); gen_filter_point_sub(&ctx->vert, ctx->out_height, y_pos, y_step); @@ -203,6 +214,9 @@ bool scaler_gen_filter(struct scaler_ctx *ctx) break; case SCALER_TYPE_BILINEAR: + x_pos = (1 << 15) * ctx->in_width / ctx->out_width - (1 << 15); + y_pos = (1 << 15) * ctx->in_height / ctx->out_height - (1 << 15); + gen_filter_bilinear_sub(&ctx->horiz, ctx->out_width, x_pos, x_step); gen_filter_bilinear_sub(&ctx->vert, ctx->out_height, y_pos, y_step); break; @@ -210,8 +224,10 @@ bool scaler_gen_filter(struct scaler_ctx *ctx) case SCALER_TYPE_SINC: /* Need to expand the filter when downsampling * to get a proper low-pass effect. */ - x_pos -= sinc_size << 15; - y_pos -= sinc_size << 15; + + x_pos = (1 << 15) * ctx->in_width / ctx->out_width - (1 << 15) - (sinc_size << 15); + y_pos = (1 << 15) * ctx->in_height / ctx->out_height - (1 << 15) - (sinc_size << 15); + gen_filter_sinc_sub(&ctx->horiz, ctx->out_width, x_pos, x_step, ctx->in_width > ctx->out_width ? (double)ctx->out_width / ctx->in_width : 1.0); gen_filter_sinc_sub(&ctx->vert, ctx->out_height, y_pos, y_step, diff --git a/libretro-common/gfx/scaler/scaler_int.c b/libretro-common/gfx/scaler/scaler_int.c index 1eff9855e3..3c659e16b0 100644 --- a/libretro-common/gfx/scaler/scaler_int.c +++ b/libretro-common/gfx/scaler/scaler_int.c @@ -67,14 +67,14 @@ void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int strid int h, w, y; const uint64_t *input = ctx->scaled.frame; uint32_t *output = (uint32_t*)output_; + const int16_t *filter_vert = ctx->vert.filter; - int stride_div_8 = ctx->scaled.stride >> 3; for (h = 0; h < ctx->out_height; h++, filter_vert += ctx->vert.filter_stride, output += stride >> 2) { const uint64_t *input_base = input + ctx->vert.filter_pos[h] - * stride_div_8; + * (ctx->scaled.stride >> 3); for (w = 0; w < ctx->out_width; w++) { @@ -87,16 +87,16 @@ void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int strid input_base_y += (ctx->scaled.stride >> 2)) { __m128i coeff = _mm_set_epi64x(filter_vert[y + 1] * 0x0001000100010001ll, filter_vert[y + 0] * 0x0001000100010001ll); - __m128i col = _mm_set_epi64x(input_base_y[stride_div_8], input_base_y[0]); + __m128i col = _mm_set_epi64x(input_base_y[ctx->scaled.stride >> 3], input_base_y[0]); res = _mm_adds_epi16(_mm_mulhi_epi16(col, coeff), res); } - /* Handle the last odd filter length case */ - if (ctx->vert.filter_len % 2 != 0) + for (; y < ctx->vert.filter_len; y++, input_base_y += (ctx->scaled.stride >> 3)) { __m128i coeff = _mm_set_epi64x(0, filter_vert[y] * 0x0001000100010001ll); __m128i col = _mm_set_epi64x(0, input_base_y[0]); + res = _mm_adds_epi16(_mm_mulhi_epi16(col, coeff), res); } @@ -107,28 +107,39 @@ void scaler_argb8888_vert(const struct scaler_ctx *ctx, void *output_, int strid output[w] = _mm_cvtsi128_si32(final); #else - uint32_t res_a = 0, res_r = 0, res_g = 0, res_b = 0; + int16_t res_a = 0; + int16_t res_r = 0; + int16_t res_g = 0; + int16_t res_b = 0; - for (y = 0; y < ctx->vert.filter_len; y++, input_base_y += stride_div_8) + for (y = 0; y < ctx->vert.filter_len; y++, + input_base_y += (ctx->scaled.stride >> 3)) { - uint64_t col = *input_base_y; + uint64_t col = *input_base_y; - res_a += (uint16_t)((col >> 48) & 0xffff) * filter_vert[y] >> 16; - res_r += (uint16_t)((col >> 32) & 0xffff) * filter_vert[y] >> 16; - res_g += (uint16_t)((col >> 16) & 0xffff) * filter_vert[y] >> 16; - res_b += (uint16_t)((col >> 0) & 0xffff) * filter_vert[y] >> 16; + int16_t a = (col >> 48) & 0xffff; + int16_t r = (col >> 32) & 0xffff; + int16_t g = (col >> 16) & 0xffff; + int16_t b = (col >> 0) & 0xffff; + + int16_t coeff = filter_vert[y]; + + res_a += (a * coeff) >> 16; + res_r += (r * coeff) >> 16; + res_g += (g * coeff) >> 16; + res_b += (b * coeff) >> 16; } - res_a >>= (7 - 2 - 2); - res_r >>= (7 - 2 - 2); - res_g >>= (7 - 2 - 2); - res_b >>= (7 - 2 - 2); + res_a >>= (7 - 2 - 2); + res_r >>= (7 - 2 - 2); + res_g >>= (7 - 2 - 2); + res_b >>= (7 - 2 - 2); - output[w] = - (clamp_8bit(res_a) << 24) - | (clamp_8bit(res_r) << 16) - | (clamp_8bit(res_g) << 8) - | (clamp_8bit(res_b) << 0); + output[w] = + (clamp_8bit(res_a) << 24) | + (clamp_8bit(res_r) << 16) | + (clamp_8bit(res_g) << 8) | + (clamp_8bit(res_b) << 0); #endif } } @@ -211,10 +222,10 @@ void scaler_argb8888_horiz(const struct scaler_ctx *ctx, const void *input_, int } output[w] = ( - (uint64_t)res_a << 48) - | ((uint64_t)res_r << 32) - | ((uint64_t)res_g << 16) - | ((uint64_t)res_b << 0); + (uint64_t)res_a << 48) | + ((uint64_t)res_r << 32) | + ((uint64_t)res_g << 16) | + ((uint64_t)res_b << 0); #endif } } diff --git a/libretro-common/include/file/file_path.h b/libretro-common/include/file/file_path.h index f5636a74d0..708bdb5170 100644 --- a/libretro-common/include/file/file_path.h +++ b/libretro-common/include/file/file_path.h @@ -275,6 +275,7 @@ bool path_is_absolute(const char *path); * Hidden non-leaf function cost: * - calls strlcpy 2x * - calls strrchr + * - calls strlcat * * @return Length of the string copied into @out */ @@ -627,6 +628,7 @@ void path_basedir_wrapper(char *s); * if not already there. * Hidden non-leaf function cost: + * - can call strlcat once if it returns false * - calls strlen **/ size_t fill_pathname_slash(char *s, size_t len); diff --git a/libretro-common/lists/file_list.c b/libretro-common/lists/file_list.c index f225e7f7dc..3f50563e95 100644 --- a/libretro-common/lists/file_list.c +++ b/libretro-common/lists/file_list.c @@ -29,142 +29,152 @@ #include #include -static void initialize_item(struct item_file *item) -{ - item->path = NULL; - item->label = NULL; - item->alt = NULL; - item->type = 0; - item->directory_ptr = 0; - item->entry_idx = 0; - item->userdata = NULL; - item->actiondata = NULL; -} - static bool file_list_deinitialize_internal(file_list_t *list) { size_t i; for (i = 0; i < list->size; i++) { - int j; - char **fields[3]; file_list_free_userdata(list, i); file_list_free_actiondata(list, i); - fields[0] = &list->list[i].path; - fields[1] = &list->list[i].label; - fields[2] = &list->list[i].alt; - for (j = 0; j < 3; j++) - { - if (*fields[j]) - { - free(*fields[j]); - *fields[j] = NULL; - } - } + if (list->list[i].path) + free(list->list[i].path); + list->list[i].path = NULL; + + if (list->list[i].label) + free(list->list[i].label); + list->list[i].label = NULL; + + if (list->list[i].alt) + free(list->list[i].alt); + list->list[i].alt = NULL; } - free(list->list); + if (list->list) + free(list->list); list->list = NULL; return true; } bool file_list_reserve(file_list_t *list, size_t nitems) { - struct item_file *new_data; const size_t item_size = sizeof(struct item_file); + struct item_file *new_data; - if (nitems <= list->capacity || nitems > (size_t)-1 / item_size) + if (nitems < list->capacity || nitems > (size_t)-1/item_size) return false; - new_data = (struct item_file*)realloc(list->list, nitems * item_size); - if (!new_data) + if (!(new_data = (struct item_file*)realloc(list->list, nitems * item_size))) return false; memset(&new_data[list->capacity], 0, item_size * (nitems - list->capacity)); + list->list = new_data; list->capacity = nitems; return true; } -bool file_list_insert(file_list_t *list, const char *path, const char *label, +bool file_list_insert(file_list_t *list, + const char *path, const char *label, unsigned type, size_t directory_ptr, - size_t entry_idx, size_t idx) + size_t entry_idx, + size_t idx) { int i; + /* Expand file list if needed */ if (list->size >= list->capacity) if (!file_list_reserve(list, list->capacity * 2 + 1)) return false; - memmove(&list->list[idx + 1], &list->list[idx], (list->size - idx) * sizeof(struct item_file)); + for (i = (unsigned)list->size; i > (int)idx; i--) + { + struct item_file *copy = (struct item_file*) + malloc(sizeof(struct item_file)); - initialize_item(&list->list[idx]); + if (copy) + { + copy->path = NULL; + copy->label = NULL; + copy->alt = NULL; + copy->type = 0; + copy->directory_ptr = 0; + copy->entry_idx = 0; + copy->userdata = NULL; + copy->actiondata = NULL; + + memcpy(copy, &list->list[i-1], sizeof(struct item_file)); + + memcpy(&list->list[i-1], &list->list[i], sizeof(struct item_file)); + memcpy(&list->list[i], copy, sizeof(struct item_file)); + + free(copy); + } + } + + list->list[idx].path = NULL; + list->list[idx].label = NULL; + list->list[idx].alt = NULL; list->list[idx].type = type; list->list[idx].directory_ptr = directory_ptr; list->list[idx].entry_idx = entry_idx; + list->list[idx].userdata = NULL; + list->list[idx].actiondata = NULL; if (label) - { - char *dup_label = strdup(label); - if (dup_label) - list->list[idx].label = dup_label; - } + list->list[idx].label = strdup(label); if (path) - { - char *dup_path = strdup(path); - if (dup_path) - list->list[idx].path = dup_path; - } + list->list[idx].path = strdup(path); list->size++; + return true; } -bool file_list_append(file_list_t *list, const char *path, const char *label, - unsigned type, size_t directory_ptr, size_t entry_idx) +bool file_list_append(file_list_t *list, + const char *path, const char *label, + unsigned type, size_t directory_ptr, + size_t entry_idx) { unsigned idx = (unsigned)list->size; + /* Expand file list if needed */ if (idx >= list->capacity) if (!file_list_reserve(list, list->capacity * 2 + 1)) return false; - initialize_item(&list->list[idx]); + list->list[idx].path = NULL; + list->list[idx].label = NULL; + list->list[idx].alt = NULL; list->list[idx].type = type; list->list[idx].directory_ptr = directory_ptr; list->list[idx].entry_idx = entry_idx; + list->list[idx].userdata = NULL; + list->list[idx].actiondata = NULL; if (label) - { - char *dup_label = strdup(label); - if (dup_label) - list->list[idx].label = dup_label; - } + list->list[idx].label = strdup(label); if (path) - { - char *dup_path = strdup(path); - if (dup_path) - list->list[idx].path = dup_path; - } + list->list[idx].path = strdup(path); list->size++; + return true; } void file_list_pop(file_list_t *list, size_t *directory_ptr) { - if (!list || list->size == 0) + if (!list) return; - --list->size; - if (list->list[list->size].path) + if (list->size != 0) { - free(list->list[list->size].path); + --list->size; + if (list->list[list->size].path) + free(list->list[list->size].path); list->list[list->size].path = NULL; - } - if (list->list[list->size].label) - { - free(list->list[list->size].label); + + if (list->list[list->size].label) + free(list->list[list->size].label); list->list[list->size].label = NULL; } @@ -287,7 +297,7 @@ void file_list_free_actiondata(const file_list_t *list, size_t idx) if (!list) return; if (list->list[idx].actiondata) - free(list->list[idx].actiondata); + free(list->list[idx].actiondata); list->list[idx].actiondata = NULL; } @@ -296,7 +306,7 @@ void file_list_free_userdata(const file_list_t *list, size_t idx) if (!list) return; if (list->list[idx].userdata) - free(list->list[idx].userdata); + free(list->list[idx].userdata); list->list[idx].userdata = NULL; } @@ -312,8 +322,8 @@ bool file_list_search(const file_list_t *list, const char *needle, size_t *idx) { const char *str = NULL; const char *alt = list->list[i].alt - ? list->list[i].alt - : list->list[i].path; + ? list->list[i].alt + : list->list[i].path; if (!alt) {