Revert "Rollback changes for now until we can get this compiling on MSVC 2005 on"

This reverts commit 08481e2a68.
This commit is contained in:
twinaphex 2021-06-11 12:05:48 +02:00
parent 5d4069cf8f
commit bd9c35d379
1 changed files with 416 additions and 378 deletions

View File

@ -75,16 +75,15 @@ typedef struct rarch_sinc_resampler
float *phase_table; float *phase_table;
float *buffer_l; float *buffer_l;
float *buffer_r; float *buffer_r;
unsigned enable_avx;
unsigned phase_bits; unsigned phase_bits;
unsigned subphase_bits; unsigned subphase_bits;
unsigned subphase_mask; unsigned subphase_mask;
unsigned taps; unsigned taps;
unsigned ptr; unsigned ptr;
unsigned num_channels;
uint32_t time; uint32_t time;
float subphase_mod; float subphase_mod;
float kaiser_beta; float kaiser_beta;
enum sinc_window window_type;
} rarch_sinc_resampler_t; } rarch_sinc_resampler_t;
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)) || defined(HAVE_NEON) #if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)) || defined(HAVE_NEON)
@ -154,7 +153,7 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
#endif #endif
#if defined(__AVX__) #if defined(__AVX__)
static void resampler_sinc_process_avx(void *re_, struct resampler_data *data) static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *data)
{ {
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
@ -165,8 +164,6 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
size_t frames = data->input_frames; size_t frames = data->input_frames;
size_t out_frames = 0; size_t out_frames = 0;
if (resamp->window_type == SINC_WINDOW_KAISER)
{
while (frames) while (frames)
{ {
while (frames && resamp->time >= phases) while (frames && resamp->time >= phases)
@ -235,9 +232,21 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
} }
} }
} }
data->output_frames = out_frames;
} }
else
static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
{ {
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
while (frames) while (frames)
{ {
while (frames && resamp->time >= phases) while (frames && resamp->time >= phases)
@ -301,14 +310,13 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
} }
} }
} }
}
data->output_frames = out_frames; data->output_frames = out_frames;
} }
#endif #endif
#if defined(__SSE__) #if defined(__SSE__)
static void resampler_sinc_process_sse(void *re_, struct resampler_data *data) static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *data)
{ {
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
@ -319,8 +327,6 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
size_t frames = data->input_frames; size_t frames = data->input_frames;
size_t out_frames = 0; size_t out_frames = 0;
if (resamp->window_type == SINC_WINDOW_KAISER)
{
while (frames) while (frames)
{ {
while (frames && resamp->time >= phases) while (frames && resamp->time >= phases)
@ -347,7 +353,9 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
while (resamp->time < phases) while (resamp->time < phases)
{ {
unsigned i; unsigned i;
#if 0
__m128 sum; __m128 sum;
#endif
unsigned phase = resamp->time >> resamp->subphase_bits; unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2; float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps; float *delta_table = phase_table + taps;
@ -368,40 +376,55 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc)); sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
} }
#ifdef HAVE_GRIFFIN
/* Them annoying shuffles. /* Them annoying shuffles.
* sum_l = { l3, l2, l1, l0 } * sum_l = { l3, l2, l1, l0 }
* sum_r = { r3, r2, r1, r0 } * sum_r = { r3, r2, r1, r0 }
*/ */
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
_MM_SHUFFLE(1, 0, 1, 0)), _MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
* sum = { R1, R0, L1, L0 } * sum = { R1, R0, L1, L0 }
*/ */
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } /* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
* sum = { X, R, X, L } * sum = { X, R, X, L }
*/ */
/* Store L */ /* Store L */
_mm_store_ss(output + 0, sum); _mm_store_ss(output++, sum);
/* movehl { X, R, X, L } == { X, R, X, R } */ /* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); _mm_store_ss(output++, _mm_movehl_ps(sum, sum));
#else
#ifdef _MSC_VER
*(output++) = _mm_cvtss_f32(sum_l) + sum_l.m128_f32[1] + sum_l.m128_f32[2] + sum_l.m128_f32[3];
*(output++) = _mm_cvtss_f32(sum_r) + sum_r.m128_f32[1] + sum_r.m128_f32[2] + sum_r.m128_f32[3];
#else
*(output++) = _mm_cvtss_f32(sum_l) + sum_l[1] + sum_l[2] + sum_l[3];
*(output++) = _mm_cvtss_f32(sum_r) + sum_r[1] + sum_r[2] + sum_r[3];
#endif
#endif
output += 2;
out_frames++; out_frames++;
resamp->time += ratio; resamp->time += ratio;
} }
} }
} }
data->output_frames = out_frames;
} }
else
static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
{ {
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
while (frames) while (frames)
{ {
while (frames && resamp->time >= phases) while (frames && resamp->time >= phases)
@ -428,7 +451,9 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
while (resamp->time < phases) while (resamp->time < phases)
{ {
unsigned i; unsigned i;
#if 0
__m128 sum; __m128 sum;
#endif
unsigned phase = resamp->time >> resamp->subphase_bits; unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps; float *phase_table = resamp->phase_table + phase * taps;
@ -444,44 +469,46 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc)); sum_r = _mm_add_ps(sum_r, _mm_mul_ps(buf_r, _sinc));
} }
#if 0
/* Them annoying shuffles. /* Them annoying shuffles.
* sum_l = { l3, l2, l1, l0 } * sum_l = { l3, l2, l1, l0 }
* sum_r = { r3, r2, r1, r0 } * sum_r = { r3, r2, r1, r0 }
*/ */
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r, sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
_MM_SHUFFLE(1, 0, 1, 0)), _MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2))); _mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 } /* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
* sum = { R1, R0, L1, L0 } * sum = { R1, R0, L1, L0 }
*/ */
sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum); sum = _mm_add_ps(_mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 3, 1, 1)), sum);
/* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 } /* sum = {R1, R1, L1, L1 } + { R1, R0, L1, L0 }
* sum = { X, R, X, L } * sum = { X, R, X, L }
*/ */
/* Store L */ /* Store L */
_mm_store_ss(output + 0, sum); _mm_store_ss(output++, sum);
/* movehl { X, R, X, L } == { X, R, X, R } */ /* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum)); _mm_store_ss(output++, _mm_movehl_ps(sum, sum));
#else
#ifdef _MSC_VER
*(output++) = _mm_cvtss_f32(sum_l) + sum_l.m128_f32[1] + sum_l.m128_f32[2] + sum_l.m128_f32[3];
*(output++) = _mm_cvtss_f32(sum_r) + sum_r.m128_f32[1] + sum_r.m128_f32[2] + sum_r.m128_f32[3];
#else
*(output++) = _mm_cvtss_f32(sum_l) + sum_l[1] + sum_l[2] + sum_l[3];
*(output++) = _mm_cvtss_f32(sum_r) + sum_r[1] + sum_r[2] + sum_r[3];
#endif
#endif
output += 2;
out_frames++; out_frames++;
resamp->time += ratio; resamp->time += ratio;
} }
} }
} }
}
data->output_frames = out_frames; data->output_frames = out_frames;
} }
#endif #endif
static void resampler_sinc_process_c(void *re_, struct resampler_data *data) static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *data)
{ {
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_; rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits); unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
@ -492,8 +519,6 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
size_t frames = data->input_frames; size_t frames = data->input_frames;
size_t out_frames = 0; size_t out_frames = 0;
if (resamp->window_type == SINC_WINDOW_KAISER)
{
while (frames) while (frames)
{ {
while (frames && resamp->time >= phases) while (frames && resamp->time >= phases)
@ -536,19 +561,30 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
sum_r += buffer_r[i] * sinc_val; sum_r += buffer_r[i] * sinc_val;
} }
output[0] = sum_l; *output++ = sum_l;
output[1] = sum_r; *output++ = sum_r;
output += 2;
out_frames++; out_frames++;
resamp->time += ratio; resamp->time += ratio;
} }
} }
} }
data->output_frames = out_frames;
} }
else
static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
{ {
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
while (frames) while (frames)
{ {
while (frames && resamp->time >= phases) while (frames && resamp->time >= phases)
@ -588,17 +624,15 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
sum_r += buffer_r[i] * sinc_val; sum_r += buffer_r[i] * sinc_val;
} }
output[0] = sum_l; *output++ = sum_l;
output[1] = sum_r; *output++ = sum_r;
output += 2;
out_frames++; out_frames++;
resamp->time += ratio; resamp->time += ratio;
} }
} }
} }
}
data->output_frames = out_frames; data->output_frames = out_frames;
} }
@ -735,14 +769,14 @@ static void *resampler_sinc_new(const struct resampler_config *config,
size_t phase_elems = 0; size_t phase_elems = 0;
size_t elems = 0; size_t elems = 0;
unsigned sidelobes = 0; unsigned sidelobes = 0;
unsigned enable_avx = 0;
enum sinc_window window_type = SINC_WINDOW_NONE;
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*) rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)
calloc(1, sizeof(*re)); calloc(1, sizeof(*re));
if (!re) if (!re)
return NULL; return NULL;
re->window_type = SINC_WINDOW_NONE;
switch (quality) switch (quality)
{ {
case RESAMPLER_QUALITY_LOWEST: case RESAMPLER_QUALITY_LOWEST:
@ -750,34 +784,32 @@ static void *resampler_sinc_new(const struct resampler_config *config,
sidelobes = 2; sidelobes = 2;
re->phase_bits = 12; re->phase_bits = 12;
re->subphase_bits = 10; re->subphase_bits = 10;
re->window_type = SINC_WINDOW_LANCZOS; window_type = SINC_WINDOW_LANCZOS;
re->enable_avx = 0;
break; break;
case RESAMPLER_QUALITY_LOWER: case RESAMPLER_QUALITY_LOWER:
cutoff = 0.98; cutoff = 0.98;
sidelobes = 4; sidelobes = 4;
re->phase_bits = 12; re->phase_bits = 12;
re->subphase_bits = 10; re->subphase_bits = 10;
re->window_type = SINC_WINDOW_LANCZOS; window_type = SINC_WINDOW_LANCZOS;
re->enable_avx = 0;
break; break;
case RESAMPLER_QUALITY_HIGHER: case RESAMPLER_QUALITY_HIGHER:
cutoff = 0.90; cutoff = 0.90;
sidelobes = 32; sidelobes = 32;
re->phase_bits = 10; re->phase_bits = 10;
re->subphase_bits = 14; re->subphase_bits = 14;
re->window_type = SINC_WINDOW_KAISER;
re->kaiser_beta = 10.5; re->kaiser_beta = 10.5;
re->enable_avx = 1; enable_avx = 1;
window_type = SINC_WINDOW_KAISER;
break; break;
case RESAMPLER_QUALITY_HIGHEST: case RESAMPLER_QUALITY_HIGHEST:
cutoff = 0.962; cutoff = 0.962;
sidelobes = 128; sidelobes = 128;
re->phase_bits = 10; re->phase_bits = 10;
re->subphase_bits = 14; re->subphase_bits = 14;
re->window_type = SINC_WINDOW_KAISER;
re->kaiser_beta = 14.5; re->kaiser_beta = 14.5;
re->enable_avx = 1; enable_avx = 1;
window_type = SINC_WINDOW_KAISER;
break; break;
case RESAMPLER_QUALITY_NORMAL: case RESAMPLER_QUALITY_NORMAL:
case RESAMPLER_QUALITY_DONTCARE: case RESAMPLER_QUALITY_DONTCARE:
@ -785,14 +817,14 @@ static void *resampler_sinc_new(const struct resampler_config *config,
sidelobes = 8; sidelobes = 8;
re->phase_bits = 8; re->phase_bits = 8;
re->subphase_bits = 16; re->subphase_bits = 16;
re->window_type = SINC_WINDOW_KAISER;
re->kaiser_beta = 5.5; re->kaiser_beta = 5.5;
re->enable_avx = 0; window_type = SINC_WINDOW_KAISER;
break; break;
} }
re->subphase_mask = (1 << re->subphase_bits) - 1; re->subphase_mask = (1 << re->subphase_bits) - 1;
re->subphase_mod = 1.0f / (1 << re->subphase_bits); re->subphase_mod = 1.0f / (1 << re->subphase_bits);
re->num_channels = 2;
re->taps = sidelobes * 2; re->taps = sidelobes * 2;
/* Downsampling, must lower cutoff, and extend number of /* Downsampling, must lower cutoff, and extend number of
@ -805,7 +837,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
/* Be SIMD-friendly. */ /* Be SIMD-friendly. */
#if defined(__AVX__) #if defined(__AVX__)
if (re->enable_avx) if (enable_avx)
re->taps = (re->taps + 7) & ~7; re->taps = (re->taps + 7) & ~7;
else else
#endif #endif
@ -818,7 +850,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
} }
phase_elems = ((1 << re->phase_bits) * re->taps); phase_elems = ((1 << re->phase_bits) * re->taps);
if (re->window_type == SINC_WINDOW_KAISER) if (window_type == SINC_WINDOW_KAISER)
phase_elems = phase_elems * 2; phase_elems = phase_elems * 2;
elems = phase_elems + 4 * re->taps; elems = phase_elems + 4 * re->taps;
@ -832,7 +864,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
re->buffer_l = re->main_buffer + phase_elems; re->buffer_l = re->main_buffer + phase_elems;
re->buffer_r = re->buffer_l + 2 * re->taps; re->buffer_r = re->buffer_l + 2 * re->taps;
switch (re->window_type) switch (window_type)
{ {
case SINC_WINDOW_LANCZOS: case SINC_WINDOW_LANCZOS:
sinc_init_table_lanczos(re, cutoff, re->phase_table, sinc_init_table_lanczos(re, cutoff, re->phase_table,
@ -847,20 +879,26 @@ static void *resampler_sinc_new(const struct resampler_config *config,
} }
sinc_resampler.process = resampler_sinc_process_c; sinc_resampler.process = resampler_sinc_process_c;
if (window_type == SINC_WINDOW_KAISER)
sinc_resampler.process = resampler_sinc_process_c_kaiser;
if (mask & RESAMPLER_SIMD_AVX && re->enable_avx) if (mask & RESAMPLER_SIMD_AVX && enable_avx)
{ {
#if defined(__AVX__) #if defined(__AVX__)
sinc_resampler.process = resampler_sinc_process_avx; sinc_resampler.process = resampler_sinc_process_avx;
if (window_type == SINC_WINDOW_KAISER)
sinc_resampler.process = resampler_sinc_process_avx_kaiser;
#endif #endif
} }
else if (mask & RESAMPLER_SIMD_SSE) else if (mask & RESAMPLER_SIMD_SSE)
{ {
#if defined(__SSE__) #if defined(__SSE__)
sinc_resampler.process = resampler_sinc_process_sse; sinc_resampler.process = resampler_sinc_process_sse;
if (window_type == SINC_WINDOW_KAISER)
sinc_resampler.process = resampler_sinc_process_sse_kaiser;
#endif #endif
} }
else if (mask & RESAMPLER_SIMD_NEON && re->window_type != SINC_WINDOW_KAISER) else if (mask & RESAMPLER_SIMD_NEON && window_type != SINC_WINDOW_KAISER)
{ {
#if defined(WANT_NEON) #if defined(WANT_NEON)
sinc_resampler.process = resampler_sinc_process_neon; sinc_resampler.process = resampler_sinc_process_neon;