(sinc resampler) style nits

This commit is contained in:
libretroadmin 2025-07-20 09:37:15 +02:00
parent 99fe15be46
commit dc9ffcce66
1 changed files with 335 additions and 350 deletions

View File

@ -111,6 +111,7 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
while (frames)
{
while (frames && resamp->time >= phases)
@ -135,11 +136,11 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
int i;
unsigned phase = resamp->time >> resamp->subphase_bits;
const float *phase_table = resamp->phase_table + phase * taps * 2;
const float *delta_table = phase_table + taps;
float32x4_t delta = vdupq_n_f32((resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
int i;
float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
float32x2_t p3, p4;
@ -258,72 +259,69 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
size_t out_frames = 0;
unsigned taps = resamp->taps;
while (frames)
{
while (frames)
while (frames && resamp->time >= phases)
{
while (frames && resamp->time >= phases)
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
int i;
__m256 res_l, res_r;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
__m256 delta = _mm256_set1_ps((float)
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
__m256 sum_l = _mm256_setzero_ps();
__m256 sum_r = _mm256_setzero_ps();
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
for (i = 0; i < (int)taps; i += 8)
{
int i;
unsigned phase = resamp->time >> resamp->subphase_bits;
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
__m256 deltas = _mm256_load_ps(delta_table + i);
__m256 sinc = _mm256_add_ps(_mm256_load_ps((const float*)phase_table + i),
_mm256_mul_ps(deltas, delta));
float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
__m256 delta = _mm256_set1_ps((float)
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
__m256 sum_l = _mm256_setzero_ps();
__m256 sum_r = _mm256_setzero_ps();
for (i = 0; i < (int)taps; i += 8)
{
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
__m256 deltas = _mm256_load_ps(delta_table + i);
__m256 sinc = _mm256_add_ps(_mm256_load_ps((const float*)phase_table + i),
_mm256_mul_ps(deltas, delta));
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
}
/* hadd on AVX is weird, and acts on low-lanes
* and high-lanes separately. */
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
res_l = _mm256_hadd_ps(res_l, res_l);
res_r = _mm256_hadd_ps(res_r, res_r);
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
/* This is optimized to mov %xmmN, [mem].
* There doesn't seem to be any _mm256_store_ss intrinsic. */
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
output += 2;
out_frames++;
resamp->time += ratio;
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
}
/* hadd on AVX is weird, and acts on low-lanes
* and high-lanes separately. */
res_l = _mm256_hadd_ps(sum_l, sum_l);
res_r = _mm256_hadd_ps(sum_r, sum_r);
res_l = _mm256_hadd_ps(res_l, res_l);
res_r = _mm256_hadd_ps(res_r, res_r);
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
/* This is optimized to mov %xmmN, [mem].
* There doesn't seem to be any _mm256_store_ss intrinsic. */
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
@ -334,76 +332,74 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = resamp->taps;
while (frames)
{
while (frames)
while (frames && resamp->time >= phases)
{
while (frames && resamp->time >= phases)
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
int i;
__m256 delta, res_l, res_r;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
__m256 sum_l = _mm256_setzero_ps();
__m256 sum_r = _mm256_setzero_ps();
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
for (i = 0; i < (int)taps; i += 8)
{
int i;
__m256 delta;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
__m256 sinc = _mm256_load_ps((const float*)phase_table + i);
__m256 sum_l = _mm256_setzero_ps();
__m256 sum_r = _mm256_setzero_ps();
for (i = 0; i < (int)taps; i += 8)
{
__m256 buf_l = _mm256_loadu_ps(buffer_l + i);
__m256 buf_r = _mm256_loadu_ps(buffer_r + i);
__m256 sinc = _mm256_load_ps((const float*)phase_table + i);
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
}
/* hadd on AVX is weird, and acts on low-lanes
* and high-lanes separately. */
__m256 res_l = _mm256_hadd_ps(sum_l, sum_l);
__m256 res_r = _mm256_hadd_ps(sum_r, sum_r);
res_l = _mm256_hadd_ps(res_l, res_l);
res_r = _mm256_hadd_ps(res_r, res_r);
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
/* This is optimized to mov %xmmN, [mem].
* There doesn't seem to be any _mm256_store_ss intrinsic. */
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
output += 2;
out_frames++;
resamp->time += ratio;
sum_l = _mm256_add_ps(sum_l, _mm256_mul_ps(buf_l, sinc));
sum_r = _mm256_add_ps(sum_r, _mm256_mul_ps(buf_r, sinc));
}
/* hadd on AVX is weird, and acts on low-lanes
* and high-lanes separately. */
res_l = _mm256_hadd_ps(sum_l, sum_l);
res_r = _mm256_hadd_ps(sum_r, sum_r);
res_l = _mm256_hadd_ps(res_l, res_l);
res_r = _mm256_hadd_ps(res_r, res_r);
res_l = _mm256_add_ps(_mm256_permute2f128_ps(res_l, res_l, 1), res_l);
res_r = _mm256_add_ps(_mm256_permute2f128_ps(res_r, res_r, 1), res_r);
/* This is optimized to mov %xmmN, [mem].
* There doesn't seem to be any _mm256_store_ss intrinsic. */
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
@ -425,82 +421,80 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *
size_t out_frames = 0;
unsigned taps = resamp->taps;
while (frames)
{
while (frames)
while (frames && resamp->time >= phases)
{
while (frames && resamp->time >= phases)
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
int i;
__m128 sum;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
__m128 delta = _mm_set1_ps((float)
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
__m128 sum_l = _mm_setzero_ps();
__m128 sum_r = _mm_setzero_ps();
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
for (i = 0; i < (int)taps; i += 4)
{
int i;
__m128 sum;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
__m128 delta = _mm_set1_ps((float)
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
__m128 sum_l = _mm_setzero_ps();
__m128 sum_r = _mm_setzero_ps();
for (i = 0; i < (int)taps; i += 4)
{
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
__m128 deltas = _mm_load_ps(delta_table + i);
__m128 _sinc = _mm_add_ps(_mm_load_ps((const float*)phase_table + i),
_mm_mul_ps(deltas, delta));
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
}
/* Them annoying shuffles.
* sum_l = { l3, l2, l1, l0 }
* sum_r = { r3, r2, r1, r0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
_MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
* sum = { R1, R0, L1, L0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
* sum = { X, R, X, L }
*/
/* Store L */
_mm_store_ss(output + 0, sum);
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
output += 2;
out_frames++;
resamp->time += ratio;
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
__m128 deltas = _mm_load_ps(delta_table + i);
__m128 _sinc = _mm_add_ps(_mm_load_ps((const float*)phase_table + i),
_mm_mul_ps(deltas, delta));
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
}
/* Them annoying shuffles.
* sum_l = { l3, l2, l1, l0 }
* sum_r = { r3, r2, r1, r0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
_MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
* sum = { R1, R0, L1, L0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
* sum = { X, R, X, L }
*/
/* Store L */
_mm_store_ss(output + 0, sum);
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
@ -520,77 +514,75 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
size_t out_frames = 0;
unsigned taps = resamp->taps;
while (frames)
{
while (frames)
while (frames && resamp->time >= phases)
{
while (frames && resamp->time >= phases)
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
int i;
__m128 sum;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
__m128 sum_l = _mm_setzero_ps();
__m128 sum_r = _mm_setzero_ps();
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
for (i = 0; i < (int)taps; i += 4)
{
int i;
__m128 sum;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
__m128 sum_l = _mm_setzero_ps();
__m128 sum_r = _mm_setzero_ps();
for (i = 0; i < (int)taps; i += 4)
{
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
__m128 _sinc = _mm_load_ps((const float*)phase_table + i);
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
}
/* Them annoying shuffles.
* sum_l = { l3, l2, l1, l0 }
* sum_r = { r3, r2, r1, r0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
_MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
* sum = { R1, R0, L1, L0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
* sum = { X, R, X, L }
*/
/* Store L */
_mm_store_ss(output + 0, sum);
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
output += 2;
out_frames++;
resamp->time += ratio;
__m128 buf_l = _mm_loadu_ps(buffer_l + i);
__m128 buf_r = _mm_loadu_ps(buffer_r + i);
__m128 _sinc = _mm_load_ps((const float*)phase_table + i);
sum_l = _mm_add_ps(sum_l, _mm_mul_ps(buf_l, _sinc));
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
}
/* Them annoying shuffles.
* sum_l = { l3, l2, l1, l0 }
* sum_r = { r3, r2, r1, r0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
_MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
* sum = { R1, R0, L1, L0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
* sum = { X, R, X, L }
*/
/* Store L */
_mm_store_ss(output + 0, sum);
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
@ -611,57 +603,54 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da
size_t out_frames = 0;
unsigned taps = resamp->taps;
while (frames)
{
while (frames)
while (frames && resamp->time >= phases)
{
while (frames && resamp->time >= phases)
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
int i;
float sum_l = 0.0f;
float sum_r = 0.0f;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
float delta = (float)
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
for (i = 0; i < (int)taps; i++)
{
int i;
float sum_l = 0.0f;
float sum_r = 0.0f;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
float delta = (float)
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod;
float sinc_val = phase_table[i] + delta_table[i] * delta;
for (i = 0; i < (int)taps; i++)
{
float sinc_val = phase_table[i] + delta_table[i] * delta;
sum_l += buffer_l[i] * sinc_val;
sum_r += buffer_r[i] * sinc_val;
}
output[0] = sum_l;
output[1] = sum_r;
output += 2;
out_frames++;
resamp->time += ratio;
sum_l += buffer_l[i] * sinc_val;
sum_r += buffer_r[i] * sinc_val;
}
}
output[0] = sum_l;
output[1] = sum_r;
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
@ -680,54 +669,51 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
size_t out_frames = 0;
unsigned taps = resamp->taps;
while (frames)
{
while (frames)
while (frames && resamp->time >= phases)
{
while (frames && resamp->time >= phases)
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
/* Push in reverse to make filter more obvious. */
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
int i;
float sum_l = 0.0f;
float sum_r = 0.0f;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
for (i = 0; i < (int)taps; i++)
{
int i;
float sum_l = 0.0f;
float sum_r = 0.0f;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
float sinc_val = phase_table[i];
for (i = 0; i < (int)taps; i++)
{
float sinc_val = phase_table[i];
sum_l += buffer_l[i] * sinc_val;
sum_r += buffer_r[i] * sinc_val;
}
output[0] = sum_l;
output[1] = sum_r;
output += 2;
out_frames++;
resamp->time += ratio;
sum_l += buffer_l[i] * sinc_val;
sum_r += buffer_r[i] * sinc_val;
}
}
output[0] = sum_l;
output[1] = sum_r;
output += 2;
out_frames++;
resamp->time += ratio;
}
}
}
@ -748,10 +734,10 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp,
{
int i, j;
/* Kaiser window function - need to normalize w(0) to 1.0f */
float kaiser_beta = resamp->kaiser_beta;
double window_mod = besseli0(kaiser_beta);
int stride = calculate_delta ? 2 : 1;
double sidelobes = taps / 2.0;
float kaiser_beta = resamp->kaiser_beta;
double window_mod = besseli0(kaiser_beta);
int stride = calculate_delta ? 2 : 1;
double sidelobes = taps / 2.0;
for (i = 0; i < phases; i++)
{
@ -774,7 +760,6 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp,
{
int phase;
int p;
for (p = 0; p < phases - 1; p++)
{
for (j = 0; j < taps; j++)
@ -810,9 +795,9 @@ static void sinc_init_table_lanczos(
{
int i, j;
/* Lanczos window function - need to normalize w(0) to 1.0f */
double window_mod = 1.0;
int stride = calculate_delta ? 2 : 1;
double sidelobes = taps / 2.0;
double window_mod = 1.0;
int stride = calculate_delta ? 2 : 1;
double sidelobes = taps / 2.0;
for (i = 0; i < phases; i++)
{
@ -832,8 +817,8 @@ static void sinc_init_table_lanczos(
if (calculate_delta)
{
int phase;
int p;
int phase;
for (p = 0; p < phases - 1; p++)
{
@ -939,21 +924,21 @@ static void *resampler_sinc_new(const struct resampler_config *config,
/* Be SIMD-friendly. */
#if defined(__AVX__)
if (enable_avx)
re->taps = (re->taps + 7) & ~7;
re->taps = (re->taps + 7) & ~7;
else
#endif
{
#if (defined(__ARM_NEON__) || defined(HAVE_NEON))
re->taps = (re->taps + 7) & ~7;
re->taps = (re->taps + 7) & ~7;
#else
re->taps = (re->taps + 3) & ~3;
re->taps = (re->taps + 3) & ~3;
#endif
}
phase_elems = ((1 << re->phase_bits) * re->taps);
phase_elems = ((1 << re->phase_bits) * re->taps);
if (window_type == SINC_WINDOW_KAISER)
phase_elems = phase_elems * 2;
elems = phase_elems + 4 * re->taps;
elems = phase_elems + 4 * re->taps;
re->main_buffer = (float*)memalign_alloc(128, sizeof(float) * elems);
if (!re->main_buffer)