diff --git a/audio/hermite.c b/audio/hermite.c index 1dc80e3a6f..a74e66d76a 100644 --- a/audio/hermite.c +++ b/audio/hermite.c @@ -18,13 +18,20 @@ #include "resampler.h" #include #include +#include #include "../boolean.h" -#include "../general.h" #ifdef HAVE_CONFIG_H #include "../config.h" #endif +#ifndef RESAMPLER_TEST +#include "../general.h" +#else +#define RARCH_LOG(...) fprintf(stderr, __VA_ARGS__) +#define RARCH_WARN(...) fprintf(stderr, __VA_ARGS__) +#endif + #define CHANNELS 2 typedef struct rarch_hermite_resampler @@ -51,8 +58,11 @@ static inline float hermite_kernel(float mu1, float a, float b, float c, float d return (a0 * b) + (a1 * m0) + (a2 * m1) + (a3 * c); } -void *resampler_hermite_new(void) +void *resampler_hermite_new(double bandwidth_mod) { + if (bandwidth_mod < 1.0) + RARCH_WARN("Hermite resampler is likely to sound absolutely terrible when downsampling.\n"); + #ifndef RESAMPLER_TEST RARCH_LOG("Hermite resampler [C]\n"); #endif diff --git a/audio/resampler.c b/audio/resampler.c index 8ee2f3e36a..644c23882a 100644 --- a/audio/resampler.c +++ b/audio/resampler.c @@ -29,7 +29,7 @@ static const rarch_resampler_t *backends[] = { &hermite_resampler, }; -bool rarch_resampler_realloc(void **re, const rarch_resampler_t **backend, const char *ident) +bool rarch_resampler_realloc(void **re, const rarch_resampler_t **backend, const char *ident, double bw_ratio) { if (*re && *backend) (*backend)->free(*re); @@ -54,7 +54,7 @@ bool rarch_resampler_realloc(void **re, const rarch_resampler_t **backend, const if (!*backend) return false; - *re = (*backend)->init(); + *re = (*backend)->init(bw_ratio); if (!*re) { *backend = NULL; diff --git a/audio/resampler.h b/audio/resampler.h index a865f2ab5a..981c44f541 100644 --- a/audio/resampler.h +++ b/audio/resampler.h @@ -46,7 +46,7 @@ struct resampler_data typedef struct rarch_resampler { - void *(*init)(void); + void *(*init)(double bandwidth_mod); // Bandwidth factor. Will be < 1.0 for downsampling, > 1.0 for upsamling. Corresponds to expected resampling ratio. void (*process)(void *re, struct resampler_data *data); void (*free)(void *re); const char *ident; @@ -57,7 +57,7 @@ extern const rarch_resampler_t sinc_resampler; // Reallocs resampler. Will free previous handle before allocating a new one. // If ident is NULL, first resampler will be used. -bool rarch_resampler_realloc(void **re, const rarch_resampler_t **backend, const char *ident); +bool rarch_resampler_realloc(void **re, const rarch_resampler_t **backend, const char *ident, double bw_ratio); // Convenience macros. // freep makes sure to set handles to NULL to avoid double-free in rarch_resampler_realloc. diff --git a/audio/sinc.c b/audio/sinc.c index b0e911762c..1200c4b0a1 100644 --- a/audio/sinc.c +++ b/audio/sinc.c @@ -45,7 +45,7 @@ #if defined(SINC_LOWEST_QUALITY) #define SINC_WINDOW_LANCZOS #define CUTOFF 0.98 -#define PHASE_BITS 11 +#define PHASE_BITS 12 #define SINC_COEFF_LERP 0 #define SUBPHASE_BITS 10 #define SIDELOBES 2 @@ -104,18 +104,18 @@ typedef struct rarch_sinc_resampler { -#if SINC_COEFF_LERP - sample_t phase_table[1 << PHASE_BITS][TAPS * 2]; -#else - sample_t phase_table[1 << PHASE_BITS][TAPS]; -#endif - sample_t buffer_l[2 * TAPS]; - sample_t buffer_r[2 * TAPS]; + sample_t *phase_table; + sample_t *buffer_l; + sample_t *buffer_r; unsigned taps; unsigned ptr; uint32_t time; + + // A buffer for phase_table, buffer_l and buffer_r are created in a single calloc(). + // Ensure that we get as good cache locality as we can hope for. + sample_t *main_buffer; } rarch_sinc_resampler_t; static inline double sinc(double val) @@ -173,6 +173,7 @@ static void init_sinc_table(rarch_sinc_resampler_t *resamp, double cutoff, double window_mod = window_function(0.0); // Need to normalize w(0) to 1.0. int stride = calculate_delta ? 2 : 1; + double sidelobes = taps / 2.0; for (int i = 0; i < phases; i++) { for (int j = 0; j < taps; j++) @@ -180,7 +181,7 @@ static void init_sinc_table(rarch_sinc_resampler_t *resamp, double cutoff, int n = j * phases + i; double window_phase = (double)n / (phases * taps); // [0, 1). window_phase = 2.0 * window_phase - 1.0; // [-1, 1) - double sinc_phase = SIDELOBES * window_phase; + double sinc_phase = sidelobes * window_phase; float val = cutoff * sinc(M_PI * sinc_phase * cutoff) * window_function(window_phase) / window_mod; phase_table[i * stride * taps + j] = val; @@ -189,26 +190,26 @@ static void init_sinc_table(rarch_sinc_resampler_t *resamp, double cutoff, if (calculate_delta) { - for (int i = 0; i < phases - 1; i++) + for (int p = 0; p < phases - 1; p++) { for (int j = 0; j < taps; j++) { - float delta = phase_table[(i + 1) * stride * taps + j] - phase_table[i * stride * taps + j]; - phase_table[(i * stride + 1) * taps + j] = delta; + float delta = phase_table[(p + 1) * stride * taps + j] - phase_table[p * stride * taps + j]; + phase_table[(p * stride + 1) * taps + j] = delta; } } - int i = phases - 1; + int phase = phases - 1; for (int j = 0; j < taps; j++) { - int n = j * phases + (i + 1); - double window_phase = (double)n / (phases * taps); // [0, 1). - window_phase = 2.0 * window_phase - 1.0; // [-1, 1) - double sinc_phase = SIDELOBES * window_phase; + int n = j * phases + (phase + 1); + double window_phase = (double)n / (phases * taps); // (0, 1]. + window_phase = 2.0 * window_phase - 1.0; // (-1, 1] + double sinc_phase = sidelobes * window_phase; float val = cutoff * sinc(M_PI * sinc_phase * cutoff) * window_function(window_phase) / window_mod; - float delta = (val - phase_table[i * stride * taps + j]); - phase_table[(i * stride + 1) * taps + j] = delta; + float delta = (val - phase_table[phase * stride * taps + j]); + phase_table[(phase * stride + 1) * taps + j] = delta; } } } @@ -240,14 +241,16 @@ static inline void process_sinc_C(rarch_sinc_resampler_t *resamp, float *out_buf const float *buffer_l = resamp->buffer_l + resamp->ptr; const float *buffer_r = resamp->buffer_r + resamp->ptr; + unsigned taps = resamp->taps; unsigned phase = resamp->time >> SUBPHASE_BITS; - const float *phase_table = resamp->phase_table[phase]; #if SINC_COEFF_LERP - const float *delta_table = phase_table + TAPS; + const float *phase_table = resamp->phase_table + phase * taps * 2; + const float *delta_table = phase_table + taps; float delta = (float)(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD; +#else + const float *phase_table = resamp->phase_table + phase * taps; #endif - unsigned taps = resamp->taps; for (unsigned i = 0; i < taps; i++) { #if SINC_COEFF_LERP @@ -273,14 +276,16 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer) const float *buffer_l = resamp->buffer_l + resamp->ptr; const float *buffer_r = resamp->buffer_r + resamp->ptr; + unsigned taps = resamp->taps; unsigned phase = resamp->time >> SUBPHASE_BITS; - const float *phase_table = resamp->phase_table[phase]; #if SINC_COEFF_LERP - const float *delta_table = phase_table + TAPS; + const float *phase_table = resamp->phase_table + phase * taps * 2; + const float *delta_table = phase_table + taps; __m256 delta = _mm256_set1_ps((float)(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD); +#else + const float *phase_table = resamp->phase_table + phase * taps; #endif - unsigned taps = resamp->taps; for (unsigned i = 0; i < taps; i += 8) { __m256 buf_l = _mm256_loadu_ps(buffer_l + i); @@ -321,10 +326,12 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer) unsigned taps = resamp->taps; unsigned phase = resamp->time >> SUBPHASE_BITS; - const float *phase_table = resamp->phase_table[phase]; #if SINC_COEFF_LERP + const float *phase_table = resamp->phase_table + phase * taps * 2; const float *delta_table = phase_table + taps; __m128 delta = _mm_set1_ps((float)(resamp->time & SUBPHASE_MASK) * SUBPHASE_MOD); +#else + const float *phase_table = resamp->phase_table + phase * taps; #endif for (unsigned i = 0; i < taps; i += 4) @@ -365,10 +372,6 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer) } #elif defined(HAVE_NEON) -#if TAPS < 8 -#error "NEON asm requires at least 8 taps (for now)." -#endif - #if SINC_COEFF_LERP #error "NEON asm does not support SINC lerp." #endif @@ -398,9 +401,6 @@ static void resampler_sinc_process(void *re_, struct resampler_data *data) { rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_; - // If data->ratio is < 1, we are downsampling. - // The sinc table is not set up for this, as it always assumes upsampling. - // Downsampling will work, but with some added noise due to aliasing might be present. uint32_t ratio = PHASES / data->ratio; const sample_t *input = data->data_in; @@ -412,9 +412,13 @@ static void resampler_sinc_process(void *re_, struct resampler_data *data) { while (frames && re->time >= PHASES) { - re->ptr = (re->ptr - 1) & (TAPS - 1); - re->buffer_l[re->ptr + TAPS] = re->buffer_l[re->ptr] = *input++; - re->buffer_r[re->ptr + TAPS] = re->buffer_r[re->ptr] = *input++; + // Push in reverse to make filter more obvious. + if (!re->ptr) + re->ptr = re->taps; + re->ptr--; + + re->buffer_l[re->ptr + re->taps] = re->buffer_l[re->ptr] = *input++; + re->buffer_r[re->ptr + re->taps] = re->buffer_r[re->ptr] = *input++; re->time -= PHASES; frames--; @@ -434,19 +438,52 @@ static void resampler_sinc_process(void *re_, struct resampler_data *data) static void resampler_sinc_free(void *re) { - aligned_free__(re); + rarch_sinc_resampler_t *resampler = (rarch_sinc_resampler_t*)re; + if (resampler) + aligned_free__(resampler->main_buffer); + free(resampler); } -static void *resampler_sinc_new(void) +static void *resampler_sinc_new(double bandwidth_mod) { - rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)aligned_alloc__(128, sizeof(*re)); + rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)calloc(1, sizeof(*re)); if (!re) return NULL; memset(re, 0, sizeof(*re)); re->taps = TAPS; - init_sinc_table(re, CUTOFF, &re->phase_table[0][0], 1 << PHASE_BITS, re->taps, SINC_COEFF_LERP); + double cutoff = CUTOFF; + + // Downsampling, must lower cutoff, and extend number of taps accordingly to keep same stopband attenuation. + if (bandwidth_mod < 1.0) + { + cutoff *= bandwidth_mod; + re->taps = (unsigned)ceil(re->taps / bandwidth_mod); + } + + // Be SIMD-friendly. +#if (defined(__AVX__) && ENABLE_AVX) || defined(HAVE_NEON) + re->taps = (re->taps + 7) & ~7; +#else + re->taps = (re->taps + 3) & ~3; +#endif + + size_t phase_elems = (1 << PHASE_BITS) * re->taps; +#if SINC_COEFF_LERP + phase_elems *= 2; +#endif + size_t elems = phase_elems + 4 * re->taps; + + re->main_buffer = (sample_t*)aligned_alloc__(128, sizeof(sample_t) * elems); + if (!re->main_buffer) + goto error; + + re->phase_table = re->main_buffer; + re->buffer_l = re->main_buffer + phase_elems; + re->buffer_r = re->buffer_l + 2 * re->taps; + + init_sinc_table(re, cutoff, re->phase_table, 1 << PHASE_BITS, re->taps, SINC_COEFF_LERP); #if defined(__AVX__) && ENABLE_AVX RARCH_LOG("Sinc resampler [AVX]\n"); @@ -462,8 +499,11 @@ static void *resampler_sinc_new(void) #endif RARCH_LOG("SINC params (%u phase bits, %u taps).\n", PHASE_BITS, re->taps); - return re; + +error: + resampler_sinc_free(re); + return NULL; } const rarch_resampler_t sinc_resampler = { diff --git a/audio/test/Makefile b/audio/test/Makefile index d2634f948c..26089ba76c 100644 --- a/audio/test/Makefile +++ b/audio/test/Makefile @@ -16,10 +16,10 @@ LDFLAGS += -lm all: $(TESTS) -test-hermite: ../hermite.o ../utils.o main.o resampler-hermite.o +test-hermite: hermite.o ../utils.o main.o resampler-hermite.o $(CC) -o $@ $^ $(LDFLAGS) -test-snr-hermite: ../hermite.o ../utils.o snr.o resampler-hermite.o +test-snr-hermite: hermite.o ../utils.o snr.o resampler-hermite.o $(CC) -o $@ $^ $(LDFLAGS) resampler-sinc.o: ../resampler.c @@ -28,6 +28,9 @@ resampler-sinc.o: ../resampler.c resampler-hermite.o: ../resampler.c $(CC) -c -o $@ $< $(CFLAGS) +hermite.o: ../hermite.c + $(CC) -c -o $@ $< $(CFLAGS) + sinc-lowest.o: ../sinc.c $(CC) -c -o $@ $< $(CFLAGS) -DHAVE_SINC -DSINC_LOWEST_QUALITY diff --git a/audio/test/main.c b/audio/test/main.c index b51cdb4e04..531efd80cd 100644 --- a/audio/test/main.c +++ b/audio/test/main.c @@ -47,7 +47,7 @@ int main(int argc, char *argv[]) const rarch_resampler_t *resampler = NULL; void *re = NULL; - if (!rarch_resampler_realloc(&re, &resampler, NULL)) + if (!rarch_resampler_realloc(&re, &resampler, NULL, out_rate / in_rate)) { fprintf(stderr, "Failed to allocate resampler ...\n"); return 1; diff --git a/audio/test/sinc_test.m b/audio/test/sinc_test.m index 24584cc5aa..bbc0200696 100644 --- a/audio/test/sinc_test.m +++ b/audio/test/sinc_test.m @@ -2,14 +2,16 @@ close all; % 4-tap and 8-tap are Lanczos windowed, but include here for completeness. -sidelobes = [2 4 8 32 128]; -taps = sidelobes * 2; - phases = 256; -cutoffs = [0.65 0.75 0.825 0.90 0.95]; +bw = 0.375; +downsample = round(phases / bw); +cutoffs = bw * [0.65 0.75 0.825 0.90 0.95]; betas = [2.0 3.0 5.5 10.5 14.5]; -freqs = 0.05 : 0.05 : 0.99; +sidelobes = round([2 4 8 32 128] / bw); +taps = sidelobes * 2; + +freqs = 0.05 : 0.02 : 0.99; filters = length(taps); for i = 1 : filters @@ -21,17 +23,17 @@ for i = 1 : filters win = kaiser(filter_length, betas(i))'; filter = s .* win; - impulse_response_half = 0.5 * upfirdn(1, filter, phases, phases / 2); + impulse_response_half = 0.5 * upfirdn(1, filter, phases, downsample); figure('name', sprintf('Response SINC: %d taps', taps(i))); freqz(impulse_response_half); ylim([-200 0]); - signal = zeros(1, 4001); + signal = zeros(1, 8001); for freq = freqs - signal = signal + sin(pi * freq * (0 : 4000)); + signal = signal + sin(pi * freq * (0 : 8000)); end - resampled = upfirdn(signal, filter, phases, round(phases * 44100 / 48000)); + resampled = upfirdn(signal, filter, phases, downsample); figure('name', sprintf('Kaiser SINC: %d taps, w = %.f', taps(i), freq)); freqz(resampled .* kaiser(length(resampled), 40.0)'); ylim([-80 70]); diff --git a/audio/test/snr.c b/audio/test/snr.c index ab55461528..b3383d471f 100644 --- a/audio/test/snr.c +++ b/audio/test/snr.c @@ -23,6 +23,9 @@ #include #include +#undef min +#define min(a, b) ((a) < (b)) ? (a) : (b) + static void gen_signal(float *out, double omega, double bias_samples, size_t samples) { for (size_t i = 0; i < samples; i += 2) @@ -244,15 +247,9 @@ int main(int argc, char *argv[]) const unsigned fft_samples = 1024 * 128; unsigned out_rate = fft_samples / 2; - unsigned in_rate = out_rate / ratio; + unsigned in_rate = round(out_rate / ratio); ratio = (double)out_rate / in_rate; - if (ratio <= 1.0) - { - fprintf(stderr, "Ratio too low ...\n"); - return 1; - } - static const float freq_list[] = { 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.010, 0.015, 0.020, 0.025, 0.030, 0.035, 0.040, 0.045, 0.050, @@ -265,19 +262,18 @@ int main(int argc, char *argv[]) unsigned samples = in_rate * 4; float *input = calloc(sizeof(float), samples); - float *output = calloc(sizeof(float), (fft_samples + 1) * 2); + float *output = calloc(sizeof(float), (fft_samples + 16) * 2); complex double *butterfly_buf = calloc(sizeof(complex double), fft_samples / 2); assert(input); assert(output); void *re = NULL; const rarch_resampler_t *resampler = NULL; - if (!rarch_resampler_realloc(&re, &resampler, NULL)) + if (!rarch_resampler_realloc(&re, &resampler, NULL, ratio)) return 1; test_fft(); - printf("Omega,SNR,Gain\n"); for (unsigned i = 0; i < sizeof(freq_list) / sizeof(freq_list[0]); i++) { unsigned freq = freq_list[i] * in_rate; @@ -293,12 +289,13 @@ int main(int argc, char *argv[]) rarch_resampler_process(resampler, re, &data); - unsigned out_samples = data.output_frames * 2; - assert(out_samples >= fft_samples * 2); - // We generate 2 seconds worth of audio, however, only the last second is considered so phase has stabilized. struct snr_result res; - calculate_snr(&res, freq, in_rate / 2, output + fft_samples, butterfly_buf, fft_samples); + unsigned max_freq = min(in_rate, out_rate) / 2; + if (freq > max_freq) + continue; + + calculate_snr(&res, freq, max_freq, output + fft_samples - 2048, butterfly_buf, fft_samples); printf("SNR @ w = %5.3f : %6.2lf dB, Gain: %6.1lf dB\n", freq_list[i], res.snr, res.gain); diff --git a/driver.c b/driver.c index 48eb414e13..364ecba733 100644 --- a/driver.c +++ b/driver.c @@ -406,9 +406,13 @@ void init_audio(void) g_extern.audio_data.chunk_size = g_extern.audio_data.nonblock_chunk_size; } + g_extern.audio_data.orig_src_ratio = + g_extern.audio_data.src_ratio = + (double)g_settings.audio.out_rate / g_settings.audio.in_rate; + const char *resampler = *g_settings.audio.resampler ? g_settings.audio.resampler : NULL; if (!rarch_resampler_realloc(&g_extern.audio_data.resampler_data, &g_extern.audio_data.resampler, - resampler)) + resampler, g_extern.audio_data.orig_src_ratio)) { RARCH_ERR("Failed to initialize resampler \"%s\".\n", resampler ? resampler : "(default)"); g_extern.audio_active = false; @@ -421,10 +425,6 @@ void init_audio(void) rarch_assert(g_settings.audio.out_rate < g_settings.audio.in_rate * AUDIO_MAX_RATIO); rarch_assert(g_extern.audio_data.outsamples = (sample_t*)malloc(outsamples_max * sizeof(sample_t))); - g_extern.audio_data.orig_src_ratio = - g_extern.audio_data.src_ratio = - (double)g_settings.audio.out_rate / g_settings.audio.in_rate; - if (g_extern.audio_active && g_settings.audio.rate_control) { if (driver.audio->buffer_size && driver.audio->write_avail) diff --git a/record/ffemu.c b/record/ffemu.c index 4728c7ac44..a7ffffe4ff 100644 --- a/record/ffemu.c +++ b/record/ffemu.c @@ -281,7 +281,8 @@ static bool ffemu_init_audio(ffemu_t *handle) rarch_resampler_realloc(&audio->resampler_data, &audio->resampler, - *g_settings.audio.resampler ? g_settings.audio.resampler : NULL); + *g_settings.audio.resampler ? g_settings.audio.resampler : NULL, + audio->ratio); } else {