diff --git a/libretro-common/audio/resampler/drivers/sinc_resampler.c b/libretro-common/audio/resampler/drivers/sinc_resampler.c index 226d8aeb8b..50f7138c55 100644 --- a/libretro-common/audio/resampler/drivers/sinc_resampler.c +++ b/libretro-common/audio/resampler/drivers/sinc_resampler.c @@ -111,6 +111,7 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data size_t frames = data->input_frames; size_t out_frames = 0; unsigned taps = resamp->taps; + while (frames) { while (frames && resamp->time >= phases) @@ -135,11 +136,11 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data const float *buffer_r = resamp->buffer_r + resamp->ptr; while (resamp->time < phases) { + int i; unsigned phase = resamp->time >> resamp->subphase_bits; const float *phase_table = resamp->phase_table + phase * taps * 2; const float *delta_table = phase_table + taps; float32x4_t delta = vdupq_n_f32((resamp->time & resamp->subphase_mask) * resamp->subphase_mod); - int i; float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0}; float32x2_t p3, p4; @@ -258,72 +259,69 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data * size_t out_frames = 0; unsigned taps = resamp->taps; + while (frames) { - while (frames) + while (frames && resamp->time >= phases) { - while (frames && resamp->time >= phases) + /* Push in reverse to make filter more obvious. */ + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; + + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; + + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; + + resamp->time -= phases; + frames--; + } + + { + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { - /* Push in reverse to make filter more obvious. */ - if (!resamp->ptr) - resamp->ptr = taps; - resamp->ptr--; + int i; + __m256 res_l, res_r; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps * 2; + float *delta_table = phase_table + taps; + __m256 delta = _mm256_set1_ps((float) + (resamp->time & resamp->subphase_mask) * resamp->subphase_mod); + __m256 sum_l = _mm256_setzero_ps(); + __m256 sum_r = _mm256_setzero_ps(); - resamp->buffer_l[resamp->ptr + taps] = - resamp->buffer_l[resamp->ptr] = *input++; - - resamp->buffer_r[resamp->ptr + taps] = - resamp->buffer_r[resamp->ptr] = *input++; - - resamp->time -= phases; - frames--; - } - - { - const float *buffer_l = resamp->buffer_l + resamp->ptr; - const float *buffer_r = resamp->buffer_r + resamp->ptr; - while (resamp->time < phases) + for (i = 0; i < (int)taps; i += 8) { - int i; - unsigned phase = resamp->time >> resamp->subphase_bits; + __m256 buf_l = _mm256_loadu_ps(buffer_l + i); + __m256 buf_r = _mm256_loadu_ps(buffer_r + i); + __m256 deltas = _mm256_load_ps(delta_table + i); + __m256 sinc = _mm256_add_ps(_mm256_load_ps((const float*)phase_table + i), + _mm256_mul_ps(deltas, delta)); - float *phase_table = resamp->phase_table + phase * taps * 2; - float *delta_table = phase_table + taps; - __m256 delta = _mm256_set1_ps((float) - (resamp->time & resamp->subphase_mask) * resamp->subphase_mod); - - __m256 sum_l = _mm256_setzero_ps(); - __m256 sum_r = _mm256_setzero_ps(); - - for (i = 0; i < (int)taps; i += 8) - { - __m256 buf_l = _mm256_loadu_ps(buffer_l + i); - __m256 buf_r = _mm256_loadu_ps(buffer_r + i); - __m256 deltas = _mm256_load_ps(delta_table + i); - __m256 sinc = _mm256_add_ps(_mm256_load_ps((const float*)phase_table + i), - _mm256_mul_ps(deltas, delta)); - - sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc)); - sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc)); - } - - /* hadd on AVX is weird, and acts on low-lanes - * and high-lanes separately. */ - __m256 res_l = _mm256_hadd_ps(sum_l, sum_l); - __m256 res_r = _mm256_hadd_ps(sum_r, sum_r); - res_l = _mm256_hadd_ps(res_l, res_l); - res_r = _mm256_hadd_ps(res_r, res_r); - res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l); - res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r); - - /* This is optimized to mov %xmmN, [mem]. - * There doesn't seem to be any _mm256_store_ss intrinsic. */ - _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0)); - _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0)); - - output += 2; - out_frames++; - resamp->time += ratio; + sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc)); + sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc)); } + + /* hadd on AVX is weird, and acts on low-lanes + * and high-lanes separately. */ + res_l = _mm256_hadd_ps(sum_l, sum_l); + res_r = _mm256_hadd_ps(sum_r, sum_r); + res_l = _mm256_hadd_ps(res_l, res_l); + res_r = _mm256_hadd_ps(res_r, res_r); + res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l); + res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r); + + /* This is optimized to mov %xmmN, [mem]. + * There doesn't seem to be any _mm256_store_ss intrinsic. */ + _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0)); + _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0)); + + output += 2; + out_frames++; + resamp->time += ratio; } } } @@ -334,76 +332,74 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data * static void resampler_sinc_process_avx(void *re_, struct resampler_data *data) { rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; - unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); + unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); - uint32_t ratio = phases / data->ratio; - const float *input = data->data_in; - float *output = data->data_out; - size_t frames = data->input_frames; - size_t out_frames = 0; - unsigned taps = resamp->taps; + uint32_t ratio = phases / data->ratio; + const float *input = data->data_in; + float *output = data->data_out; + size_t frames = data->input_frames; + size_t out_frames = 0; + unsigned taps = resamp->taps; + while (frames) { - while (frames) + while (frames && resamp->time >= phases) { - while (frames && resamp->time >= phases) + /* Push in reverse to make filter more obvious. */ + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; + + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; + + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; + + resamp->time -= phases; + frames--; + } + + { + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { - /* Push in reverse to make filter more obvious. */ - if (!resamp->ptr) - resamp->ptr = taps; - resamp->ptr--; + int i; + __m256 delta, res_l, res_r; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps; - resamp->buffer_l[resamp->ptr + taps] = - resamp->buffer_l[resamp->ptr] = *input++; + __m256 sum_l = _mm256_setzero_ps(); + __m256 sum_r = _mm256_setzero_ps(); - resamp->buffer_r[resamp->ptr + taps] = - resamp->buffer_r[resamp->ptr] = *input++; - - resamp->time -= phases; - frames--; - } - - { - const float *buffer_l = resamp->buffer_l + resamp->ptr; - const float *buffer_r = resamp->buffer_r + resamp->ptr; - while (resamp->time < phases) + for (i = 0; i < (int)taps; i += 8) { - int i; - __m256 delta; - unsigned phase = resamp->time >> resamp->subphase_bits; - float *phase_table = resamp->phase_table + phase * taps; + __m256 buf_l = _mm256_loadu_ps(buffer_l + i); + __m256 buf_r = _mm256_loadu_ps(buffer_r + i); + __m256 sinc = _mm256_load_ps((const float*)phase_table + i); - __m256 sum_l = _mm256_setzero_ps(); - __m256 sum_r = _mm256_setzero_ps(); - - for (i = 0; i < (int)taps; i += 8) - { - __m256 buf_l = _mm256_loadu_ps(buffer_l + i); - __m256 buf_r = _mm256_loadu_ps(buffer_r + i); - __m256 sinc = _mm256_load_ps((const float*)phase_table + i); - - sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc)); - sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc)); - } - - /* hadd on AVX is weird, and acts on low-lanes - * and high-lanes separately. */ - __m256 res_l = _mm256_hadd_ps(sum_l, sum_l); - __m256 res_r = _mm256_hadd_ps(sum_r, sum_r); - res_l = _mm256_hadd_ps(res_l, res_l); - res_r = _mm256_hadd_ps(res_r, res_r); - res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l); - res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r); - - /* This is optimized to mov %xmmN, [mem]. - * There doesn't seem to be any _mm256_store_ss intrinsic. */ - _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0)); - _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0)); - - output += 2; - out_frames++; - resamp->time += ratio; + sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc)); + sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc)); } + + /* hadd on AVX is weird, and acts on low-lanes + * and high-lanes separately. */ + res_l = _mm256_hadd_ps(sum_l, sum_l); + res_r = _mm256_hadd_ps(sum_r, sum_r); + res_l = _mm256_hadd_ps(res_l, res_l); + res_r = _mm256_hadd_ps(res_r, res_r); + res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l); + res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r); + + /* This is optimized to mov %xmmN, [mem]. + * There doesn't seem to be any _mm256_store_ss intrinsic. */ + _mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0)); + _mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0)); + + output += 2; + out_frames++; + resamp->time += ratio; } } } @@ -425,82 +421,80 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data * size_t out_frames = 0; unsigned taps = resamp->taps; + while (frames) { - while (frames) + while (frames && resamp->time >= phases) { - while (frames && resamp->time >= phases) + /* Push in reverse to make filter more obvious. */ + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; + + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; + + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; + + resamp->time -= phases; + frames--; + } + + { + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { - /* Push in reverse to make filter more obvious. */ - if (!resamp->ptr) - resamp->ptr = taps; - resamp->ptr--; + int i; + __m128 sum; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps * 2; + float *delta_table = phase_table + taps; + __m128 delta = _mm_set1_ps((float) + (resamp->time & resamp->subphase_mask) * resamp->subphase_mod); - resamp->buffer_l[resamp->ptr + taps] = - resamp->buffer_l[resamp->ptr] = *input++; + __m128 sum_l = _mm_setzero_ps(); + __m128 sum_r = _mm_setzero_ps(); - resamp->buffer_r[resamp->ptr + taps] = - resamp->buffer_r[resamp->ptr] = *input++; - - resamp->time -= phases; - frames--; - } - - { - const float *buffer_l = resamp->buffer_l + resamp->ptr; - const float *buffer_r = resamp->buffer_r + resamp->ptr; - while (resamp->time < phases) + for (i = 0; i < (int)taps; i += 4) { - int i; - __m128 sum; - unsigned phase = resamp->time >> resamp->subphase_bits; - float *phase_table = resamp->phase_table + phase * taps * 2; - float *delta_table = phase_table + taps; - __m128 delta = _mm_set1_ps((float) - (resamp->time & resamp->subphase_mask) * resamp->subphase_mod); - - __m128 sum_l = _mm_setzero_ps(); - __m128 sum_r = _mm_setzero_ps(); - - for (i = 0; i < (int)taps; i += 4) - { - __m128 buf_l = _mm_loadu_ps(buffer_l + i); - __m128 buf_r = _mm_loadu_ps(buffer_r + i); - __m128 deltas = _mm_load_ps(delta_table + i); - __m128 _sinc = _mm_add_ps(_mm_load_ps((const float*)phase_table + i), - _mm_mul_ps(deltas, delta)); - sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc)); - sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc)); - } - - /* Them annoying shuffles. - * sum_l = { l3, l2, l1, l0 } - * sum_r = { r3, r2, r1, r0 } - */ - - sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, - _MM_SHUFFLE(1, 0, 1, 0)), - _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); - - /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } - * sum = { R1, R0, L1, L0 } - */ - - sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); - - /* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } - * sum = { X, R, X, L } - */ - - /* Store L */ - _mm_store_ss(output + 0, sum); - - /* movehl { X, R, X, L } == { X, R, X, R } */ - _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); - - output += 2; - out_frames++; - resamp->time += ratio; + __m128 buf_l = _mm_loadu_ps(buffer_l + i); + __m128 buf_r = _mm_loadu_ps(buffer_r + i); + __m128 deltas = _mm_load_ps(delta_table + i); + __m128 _sinc = _mm_add_ps(_mm_load_ps((const float*)phase_table + i), + _mm_mul_ps(deltas, delta)); + sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc)); + sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc)); } + + /* Them annoying shuffles. + * sum_l = { l3, l2, l1, l0 } + * sum_r = { r3, r2, r1, r0 } + */ + + sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, + _MM_SHUFFLE(1, 0, 1, 0)), + _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); + + /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } + * sum = { R1, R0, L1, L0 } + */ + + sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); + + /* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } + * sum = { X, R, X, L } + */ + + /* Store L */ + _mm_store_ss(output + 0, sum); + + /* movehl { X, R, X, L } == { X, R, X, R } */ + _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); + + output += 2; + out_frames++; + resamp->time += ratio; } } } @@ -520,77 +514,75 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data) size_t out_frames = 0; unsigned taps = resamp->taps; + while (frames) { - while (frames) + while (frames && resamp->time >= phases) { - while (frames && resamp->time >= phases) + /* Push in reverse to make filter more obvious. */ + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; + + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; + + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; + + resamp->time -= phases; + frames--; + } + + { + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { - /* Push in reverse to make filter more obvious. */ - if (!resamp->ptr) - resamp->ptr = taps; - resamp->ptr--; + int i; + __m128 sum; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps; - resamp->buffer_l[resamp->ptr + taps] = - resamp->buffer_l[resamp->ptr] = *input++; + __m128 sum_l = _mm_setzero_ps(); + __m128 sum_r = _mm_setzero_ps(); - resamp->buffer_r[resamp->ptr + taps] = - resamp->buffer_r[resamp->ptr] = *input++; - - resamp->time -= phases; - frames--; - } - - { - const float *buffer_l = resamp->buffer_l + resamp->ptr; - const float *buffer_r = resamp->buffer_r + resamp->ptr; - while (resamp->time < phases) + for (i = 0; i < (int)taps; i += 4) { - int i; - __m128 sum; - unsigned phase = resamp->time >> resamp->subphase_bits; - float *phase_table = resamp->phase_table + phase * taps; - - __m128 sum_l = _mm_setzero_ps(); - __m128 sum_r = _mm_setzero_ps(); - - for (i = 0; i < (int)taps; i += 4) - { - __m128 buf_l = _mm_loadu_ps(buffer_l + i); - __m128 buf_r = _mm_loadu_ps(buffer_r + i); - __m128 _sinc = _mm_load_ps((const float*)phase_table + i); - sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc)); - sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc)); - } - - /* Them annoying shuffles. - * sum_l = { l3, l2, l1, l0 } - * sum_r = { r3, r2, r1, r0 } - */ - - sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, - _MM_SHUFFLE(1, 0, 1, 0)), - _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); - - /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } - * sum = { R1, R0, L1, L0 } - */ - - sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); - - /* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } - * sum = { X, R, X, L } - */ - - /* Store L */ - _mm_store_ss(output + 0, sum); - - /* movehl { X, R, X, L } == { X, R, X, R } */ - _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); - - output += 2; - out_frames++; - resamp->time += ratio; + __m128 buf_l = _mm_loadu_ps(buffer_l + i); + __m128 buf_r = _mm_loadu_ps(buffer_r + i); + __m128 _sinc = _mm_load_ps((const float*)phase_table + i); + sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc)); + sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc)); } + + /* Them annoying shuffles. + * sum_l = { l3, l2, l1, l0 } + * sum_r = { r3, r2, r1, r0 } + */ + + sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, + _MM_SHUFFLE(1, 0, 1, 0)), + _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); + + /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } + * sum = { R1, R0, L1, L0 } + */ + + sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); + + /* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } + * sum = { X, R, X, L } + */ + + /* Store L */ + _mm_store_ss(output + 0, sum); + + /* movehl { X, R, X, L } == { X, R, X, R } */ + _mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); + + output += 2; + out_frames++; + resamp->time += ratio; } } } @@ -611,57 +603,54 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da size_t out_frames = 0; unsigned taps = resamp->taps; + while (frames) { - while (frames) + while (frames && resamp->time >= phases) { - while (frames && resamp->time >= phases) + /* Push in reverse to make filter more obvious. */ + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; + + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; + + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; + + resamp->time -= phases; + frames--; + } + + { + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { - /* Push in reverse to make filter more obvious. */ - if (!resamp->ptr) - resamp->ptr = taps; - resamp->ptr--; + int i; + float sum_l = 0.0f; + float sum_r = 0.0f; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps * 2; + float *delta_table = phase_table + taps; + float delta = (float) + (resamp->time & resamp->subphase_mask) * resamp->subphase_mod; - resamp->buffer_l[resamp->ptr + taps] = - resamp->buffer_l[resamp->ptr] = *input++; - - resamp->buffer_r[resamp->ptr + taps] = - resamp->buffer_r[resamp->ptr] = *input++; - - resamp->time -= phases; - frames--; - } - - { - const float *buffer_l = resamp->buffer_l + resamp->ptr; - const float *buffer_r = resamp->buffer_r + resamp->ptr; - while (resamp->time < phases) + for (i = 0; i < (int)taps; i++) { - int i; - float sum_l = 0.0f; - float sum_r = 0.0f; - unsigned phase = resamp->time >> resamp->subphase_bits; - float *phase_table = resamp->phase_table + phase * taps * 2; - float *delta_table = phase_table + taps; - float delta = (float) - (resamp->time & resamp->subphase_mask) * resamp->subphase_mod; + float sinc_val = phase_table[i] + delta_table[i] * delta; - for (i = 0; i < (int)taps; i++) - { - float sinc_val = phase_table[i] + delta_table[i] * delta; - - sum_l += buffer_l[i] * sinc_val; - sum_r += buffer_r[i] * sinc_val; - } - - output[0] = sum_l; - output[1] = sum_r; - - output += 2; - out_frames++; - resamp->time += ratio; + sum_l += buffer_l[i] * sinc_val; + sum_r += buffer_r[i] * sinc_val; } - } + output[0] = sum_l; + output[1] = sum_r; + + output += 2; + out_frames++; + resamp->time += ratio; + } } } @@ -680,54 +669,51 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data) size_t out_frames = 0; unsigned taps = resamp->taps; + while (frames) { - while (frames) + while (frames && resamp->time >= phases) { - while (frames && resamp->time >= phases) + /* Push in reverse to make filter more obvious. */ + if (!resamp->ptr) + resamp->ptr = taps; + resamp->ptr--; + + resamp->buffer_l[resamp->ptr + taps] = + resamp->buffer_l[resamp->ptr] = *input++; + + resamp->buffer_r[resamp->ptr + taps] = + resamp->buffer_r[resamp->ptr] = *input++; + + resamp->time -= phases; + frames--; + } + + { + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + while (resamp->time < phases) { - /* Push in reverse to make filter more obvious. */ - if (!resamp->ptr) - resamp->ptr = taps; - resamp->ptr--; + int i; + float sum_l = 0.0f; + float sum_r = 0.0f; + unsigned phase = resamp->time >> resamp->subphase_bits; + float *phase_table = resamp->phase_table + phase * taps; - resamp->buffer_l[resamp->ptr + taps] = - resamp->buffer_l[resamp->ptr] = *input++; - - resamp->buffer_r[resamp->ptr + taps] = - resamp->buffer_r[resamp->ptr] = *input++; - - resamp->time -= phases; - frames--; - } - - { - const float *buffer_l = resamp->buffer_l + resamp->ptr; - const float *buffer_r = resamp->buffer_r + resamp->ptr; - while (resamp->time < phases) + for (i = 0; i < (int)taps; i++) { - int i; - float sum_l = 0.0f; - float sum_r = 0.0f; - unsigned phase = resamp->time >> resamp->subphase_bits; - float *phase_table = resamp->phase_table + phase * taps; + float sinc_val = phase_table[i]; - for (i = 0; i < (int)taps; i++) - { - float sinc_val = phase_table[i]; - - sum_l += buffer_l[i] * sinc_val; - sum_r += buffer_r[i] * sinc_val; - } - - output[0] = sum_l; - output[1] = sum_r; - - output += 2; - out_frames++; - resamp->time += ratio; + sum_l += buffer_l[i] * sinc_val; + sum_r += buffer_r[i] * sinc_val; } - } + output[0] = sum_l; + output[1] = sum_r; + + output += 2; + out_frames++; + resamp->time += ratio; + } } } @@ -748,10 +734,10 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp, { int i, j; /* Kaiser window function - need to normalize w(0) to 1.0f */ - float kaiser_beta = resamp->kaiser_beta; - double window_mod = besseli0(kaiser_beta); - int stride = calculate_delta ? 2 : 1; - double sidelobes = taps / 2.0; + float kaiser_beta = resamp->kaiser_beta; + double window_mod = besseli0(kaiser_beta); + int stride = calculate_delta ? 2 : 1; + double sidelobes = taps / 2.0; for (i = 0; i < phases; i++) { @@ -774,7 +760,6 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp, { int phase; int p; - for (p = 0; p < phases - 1; p++) { for (j = 0; j < taps; j++) @@ -810,9 +795,9 @@ static void sinc_init_table_lanczos( { int i, j; /* Lanczos window function - need to normalize w(0) to 1.0f */ - double window_mod = 1.0; - int stride = calculate_delta ? 2 : 1; - double sidelobes = taps / 2.0; + double window_mod = 1.0; + int stride = calculate_delta ? 2 : 1; + double sidelobes = taps / 2.0; for (i = 0; i < phases; i++) { @@ -832,8 +817,8 @@ static void sinc_init_table_lanczos( if (calculate_delta) { - int phase; int p; + int phase; for (p = 0; p < phases - 1; p++) { @@ -939,21 +924,21 @@ static void *resampler_sinc_new(const struct resampler_config *config, /* Be SIMD-friendly. */ #if defined(__AVX__) if (enable_avx) - re->taps = (re->taps + 7) & ~7; + re->taps = (re->taps + 7) & ~7; else #endif { #if (defined(__ARM_NEON__) || defined(HAVE_NEON)) - re->taps = (re->taps + 7) & ~7; + re->taps = (re->taps + 7) & ~7; #else - re->taps = (re->taps + 3) & ~3; + re->taps = (re->taps + 3) & ~3; #endif } - phase_elems = ((1 << re->phase_bits) * re->taps); + phase_elems = ((1 << re->phase_bits) * re->taps); if (window_type == SINC_WINDOW_KAISER) phase_elems = phase_elems * 2; - elems = phase_elems + 4 * re->taps; + elems = phase_elems + 4 * re->taps; re->main_buffer = (float*)memalign_alloc(128, sizeof(float) * elems); if (!re->main_buffer)