Update FreeSurround

This commit is contained in:
tygyh 2024-11-24 13:27:33 +01:00
parent 401d6e70f6
commit e95b62c36b
10 changed files with 1856 additions and 1605 deletions

View File

@ -1,5 +1,5 @@
if (NOT MSVC)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
endif()

View File

@ -15,14 +15,12 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#pragma once
#ifndef CHANNELMAPS_H
#define CHANNELMAPS_H
#include "FreeSurroundDecoder.h"
#include <map>
#include <vector>
const int grid_res = 21; // resolution of the lookup grid
constexpr int grid_res = 21; // resolution of the lookup grid
// channel allocation maps (per setup)
typedef std::vector<std::vector<float *>> alloc_lut;
@ -32,5 +30,3 @@ extern std::map<unsigned, std::vector<float>> chn_angle;
extern std::map<unsigned, std::vector<float>> chn_xsf;
extern std::map<unsigned, std::vector<float>> chn_ysf;
extern std::map<unsigned, std::vector<channel_id>> chn_id;
#endif

View File

@ -14,9 +14,8 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
// USA.
#pragma once
#ifndef FREESURROUND_DECODER_H
#define FREESURROUND_DECODER_H
#include "KissFFTR.h"
#include <complex>
#include <vector>
@ -52,12 +51,10 @@ typedef enum channel_id {
// of channels that are present. Here is a graphic of the cs_5point1 setup:
// http://en.wikipedia.org/wiki/File:5_1_channels_(surround_sound)_label.svg
typedef enum channel_setup {
cs_5point1 = ci_front_left | ci_front_center | ci_front_right | ci_back_left |
ci_back_right | ci_lfe,
cs_5point1 = ci_front_left | ci_front_center | ci_front_right | ci_back_left | ci_back_right | ci_lfe,
cs_7point1 = ci_front_left | ci_front_center | ci_front_right |
ci_side_center_left | ci_side_center_right | ci_back_left |
ci_back_right | ci_lfe
cs_7point1 = ci_front_left | ci_front_center | ci_front_right | ci_side_center_left | ci_side_center_right |
ci_back_left | ci_back_right | ci_lfe
} channel_setup;
// The FreeSurround decoder.
@ -68,15 +65,15 @@ public:
// @param setup The output channel setup -- determines the number of output
// channels and their place in the sound field.
// @param blocksize Granularity at which data is processed by the decode()
// function. Must be a power of two and should correspond to ca. 10ms worth
// function. Must be a power of two and should correspond to ca. 10 ms worth
// of single-channel samples (default is 4096 for 44.1Khz data). Do not make
// it shorter or longer than 5ms to 20ms since the granularity at which
// it shorter or longer than 5 ms to 20 ms since the granularity at which
// locations are decoded changes with this.
DPL2FSDecoder();
~DPL2FSDecoder();
void Init(channel_setup setup = cs_5point1, unsigned int blocksize = 4096,
unsigned int samplerate = 48000);
void Init(channel_setup chsetup = cs_5point1, unsigned int blocksize = 4096, unsigned int sample_rate = 48000);
// Decode a chunk of stereo sound. The output is delayed by half of the
// blocksize. This function is the only one needed for straightforward
@ -86,7 +83,7 @@ public:
// @return A pointer to an internal buffer of exactly blocksize (multiplexed)
// multichannel samples. The actual number of values depends on the number of
// output channels in the chosen channel setup.
float *decode(float *input);
float *decode(const float *input);
// Flush the internal buffer.
void flush();
@ -94,22 +91,30 @@ public:
// set soundfield & rendering parameters
// for more information, see full FreeSurround source code
void set_circular_wrap(float v);
void set_shift(float v);
void set_depth(float v);
void set_focus(float v);
void set_center_image(float v);
void set_front_separation(float v);
void set_rear_separation(float v);
void set_low_cutoff(float v);
void set_high_cutoff(float v);
void set_bass_redirection(bool v);
// number of samples currently held in the buffer
unsigned int buffered();
[[nodiscard]] unsigned int buffered() const;
private:
// constants
const float pi = 3.141592654f;
const float epsilon = 0.000001f;
// number of samples per input/output block, number of output channels
@ -175,35 +180,45 @@ private:
// the signal to be constructed in every channel, in the frequency domain
// instantiate the decoder with a given channel setup and processing block
// size (in samples)
std::vector<std::vector<cplx>> signal;
std::vector<std::vector<cplx> > signal;
// helper functions
inline float sqr(double x);
inline double amplitude(const cplx &x);
inline double phase(const cplx &x);
inline cplx polar(double a, double p);
inline float min(double a, double b);
inline float max(double a, double b);
inline float clamp(double x);
inline float sign(double x);
static inline float sqr(double x);
static inline double amplitude(const cplx &x);
static inline double phase(const cplx &x);
static inline cplx polar(double a, double p);
static inline float min(double a, double b);
static inline float max(double a, double b);
static inline float clamp(double x);
static inline float sign(double x);
// get the distance of the soundfield edge, along a given angle
inline double edgedistance(double a);
static inline double edgedistance(double a);
// get the index (and fractional offset!) in a piecewise-linear channel
// allocation grid
int map_to_grid(double &x);
static int map_to_grid(double &x);
// decode a block of data and overlap-add it into outbuf
void buffered_decode(float *input);
void buffered_decode(const float *input);
// transform amp/phase difference space into x/y soundfield space
void transform_decode(double a, double p, double &x, double &y);
static std::tuple<double, double> transform_decode(double amp, double phase);
static float calculate_x(double amp, double phase);
static float calculate_y(double amp, double phase);
// apply a circular_wrap transformation to some position
void transform_circular_wrap(double &x, double &y, double refangle);
static void transform_circular_wrap(double &x, double &y, double refangle);
// apply a focus transformation to some position
void transform_focus(double &x, double &y, double focus);
static void transform_focus(double &x, double &y, double focus);
};
#endif

View File

@ -1,10 +1,13 @@
#ifndef KISS_FFT_H
#define KISS_FFT_H
#pragma once
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cmath>
#include <cstring>
#include <cstdlib>
#include <numbers>
using std::abs;
using std::sqrt;
using std::numbers::pi;
#ifdef __cplusplus
extern "C" {
@ -51,8 +54,8 @@ extern "C" {
#endif
typedef struct {
kiss_fft_scalar r;
kiss_fft_scalar i;
kiss_fft_scalar r;
kiss_fft_scalar i;
} kiss_fft_cpx;
typedef struct kiss_fft_state *kiss_fft_cfg;
@ -60,9 +63,9 @@ typedef struct kiss_fft_state *kiss_fft_cfg;
/*
* kiss_fft_alloc
*
* Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
* Initialize an FFT (or IFFT) algorithm's cfg/state buffer.
*
* typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
* typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
*
* The return value from fft_alloc is a cfg buffer used internally
* by the fft routine or NULL.
@ -76,13 +79,12 @@ typedef struct kiss_fft_state *kiss_fft_cfg;
* then the function places the cfg in mem and the size used in *lenmem
* and returns mem.
*
* If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
* If lenmem is not NULL and (mem is NULL or *lenmem is not large enough),
* then the function returns NULL and places the minimum cfg
* buffer size in *lenmem.
* */
kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem,
size_t *lenmem);
kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem);
/*
* kiss_fft(cfg,in_out_buf)
@ -100,8 +102,7 @@ void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
A more generic version of the above function. It reads its input from every Nth
sample.
* */
void kiss_fft_stride(kiss_fft_cfg cfg, const kiss_fft_cpx *fin,
kiss_fft_cpx *fout, int fin_stride);
void kiss_fft_stride(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout, int fin_stride);
/* If kiss_fft_alloc allocated a buffer, it is one contiguous
buffer and can be simply free()d when no longer needed*/
@ -121,11 +122,8 @@ void kiss_fft_cleanup(void);
int kiss_fft_next_fast_size(int n);
/* for real ffts, we need an even size */
#define kiss_fftr_next_fast_size_real(n) \
(kiss_fft_next_fast_size(((n) + 1) >> 1) << 1)
#define kiss_fftr_next_fast_size_real(n) (kiss_fft_next_fast_size(((n) + 1) >> 1) << 1)
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,5 +1,4 @@
#ifndef KISS_FTR_H
#define KISS_FTR_H
#pragma once
#include "KissFFT.h"
#ifdef __cplusplus
@ -17,23 +16,23 @@ extern "C" {
typedef struct kiss_fftr_state *kiss_fftr_cfg;
kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void *mem,
size_t *lenmem);
kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem);
/*
nfft must be even
If you don't care to allocate space, use mem = lenmem = NULL
*/
void kiss_fftr(kiss_fftr_cfg cfg, const kiss_fft_scalar *timedata,
kiss_fft_cpx *freqdata);
void kiss_fftr(kiss_fftr_cfg cfg, const kiss_fft_scalar *timedata, kiss_fft_cpx *freqdata);
/*
input timedata has nfft scalar points
output freqdata has nfft/2+1 complex points
*/
void kiss_fftri(kiss_fftr_cfg cfg, const kiss_fft_cpx *freqdata,
kiss_fft_scalar *timedata);
void kiss_fftri(kiss_fftr_cfg cfg, const kiss_fft_cpx *freqdata, kiss_fft_scalar *timedata);
/*
input freqdata has nfft/2+1 complex points
output timedata has nfft scalar points
@ -44,4 +43,3 @@ void kiss_fftri(kiss_fftr_cfg cfg, const kiss_fft_cpx *freqdata,
#ifdef __cplusplus
}
#endif
#endif

View File

@ -40,11 +40,11 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
defines kiss_fft_scalar as either short or a float type
and defines
typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
#pragma once
#include "KissFFT.h"
#include <limits.h>
#define MAXFACTORS 32
/* e.g. an fft of length 128 has 4 factors
/* e.g., a fft of length 128 has 4 factors
as far as kissfft is concerned
4*4*4*2
*/
@ -79,13 +79,11 @@ struct kiss_fft_state {
#define SAMP_MIN -SAMP_MAX
#if defined(CHECK_OVERFLOW)
#define CHECK_OVERFLOW_OP(a, op, b) \
if ((SAMPPROD)(a)op(SAMPPROD)(b) > SAMP_MAX || \
(SAMPPROD)(a)op(SAMPPROD)(b) < SAMP_MIN) { \
fprintf(stderr, \
"WARNING:overflow @ " __FILE__ "(%d): (%d " #op " %d) = %ld\n", \
__LINE__, (a), (b), (SAMPPROD)(a)op(SAMPPROD)(b)); \
}
#define CHECK_OVERFLOW_OP(a, op, b) \
if ((SAMPPROD)(a)op(SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a)op(SAMPPROD)(b) < SAMP_MIN) { \
fprintf(stderr, "WARNING:overflow @ " __FILE__ "(%d): (%d " #op " %d) = %ld\n", __LINE__, (a), (b), \
(SAMPPROD)(a)op(SAMPPROD)(b)); \
}
#endif
#define smul(a, b) ((SAMPPROD)(a) * (b))
@ -93,75 +91,84 @@ struct kiss_fft_state {
#define S_MUL(a, b) sround(smul(a, b))
#define C_MUL(m, a, b) \
do { \
(m).r = sround(smul((a).r, (b).r) - smul((a).i, (b).i)); \
(m).i = sround(smul((a).r, (b).i) + smul((a).i, (b).r)); \
} while (0)
#define C_MUL(m, a, b) \
do { \
(m).r = sround(smul((a).r, (b).r) - smul((a).i, (b).i)); \
(m).i = sround(smul((a).r, (b).i) + smul((a).i, (b).r)); \
} \
while (0)
#define DIVSCALAR(x, k) (x) = sround(smul(x, SAMP_MAX / k))
#define C_FIXDIV(c, div) \
do { \
DIVSCALAR((c).r, div); \
DIVSCALAR((c).i, div); \
} while (0)
#define C_FIXDIV(c, div) \
do { \
DIVSCALAR((c).r, div); \
DIVSCALAR((c).i, div); \
} \
while (0)
#define C_MULBYSCALAR(c, s) \
do { \
(c).r = sround(smul((c).r, s)); \
(c).i = sround(smul((c).i, s)); \
} while (0)
#define C_MULBYSCALAR(c, s) \
do { \
(c).r = sround(smul((c).r, s)); \
(c).i = sround(smul((c).i, s)); \
} \
while (0)
#else /* not FIXED_POINT*/
#define S_MUL(a, b) ((a) * (b))
#define C_MUL(m, a, b) \
do { \
(m).r = (a).r * (b).r - (a).i * (b).i; \
(m).i = (a).r * (b).i + (a).i * (b).r; \
} while (0)
#define C_MUL(m, a, b) \
do { \
(m).r = (a).r * (b).r - (a).i * (b).i; \
(m).i = (a).r * (b).i + (a).i * (b).r; \
} \
while (0)
#define C_FIXDIV(c, div) /* NOOP */
#define C_MULBYSCALAR(c, s) \
do { \
(c).r *= (s); \
(c).i *= (s); \
} while (0)
#define C_MULBYSCALAR(c, s) \
do { \
(c).r *= (s); \
(c).i *= (s); \
} \
while (0)
#endif
#ifndef CHECK_OVERFLOW_OP
#define CHECK_OVERFLOW_OP(a, op, b) /* noop */
#endif
#define C_ADD(res, a, b) \
do { \
CHECK_OVERFLOW_OP((a).r, +, (b).r) \
CHECK_OVERFLOW_OP((a).i, +, (b).i) \
(res).r = (a).r + (b).r; \
(res).i = (a).i + (b).i; \
} while (0)
#define C_SUB(res, a, b) \
do { \
CHECK_OVERFLOW_OP((a).r, -, (b).r) \
CHECK_OVERFLOW_OP((a).i, -, (b).i) \
(res).r = (a).r - (b).r; \
(res).i = (a).i - (b).i; \
} while (0)
#define C_ADDTO(res, a) \
do { \
CHECK_OVERFLOW_OP((res).r, +, (a).r) \
CHECK_OVERFLOW_OP((res).i, +, (a).i) \
(res).r += (a).r; \
(res).i += (a).i; \
} while (0)
#define C_ADD(res, a, b) \
do { \
CHECK_OVERFLOW_OP((a).r, +, (b).r) \
CHECK_OVERFLOW_OP((a).i, +, (b).i) \
(res).r = (a).r + (b).r; \
(res).i = (a).i + (b).i; \
} \
while (0)
#define C_SUB(res, a, b) \
do { \
CHECK_OVERFLOW_OP((a).r, -, (b).r) \
CHECK_OVERFLOW_OP((a).i, -, (b).i) \
(res).r = (a).r - (b).r; \
(res).i = (a).i - (b).i; \
} \
while (0)
#define C_ADDTO(res, a) \
do { \
CHECK_OVERFLOW_OP((res).r, +, (a).r) \
CHECK_OVERFLOW_OP((res).i, +, (a).i) \
(res).r += (a).r; \
(res).i += (a).i; \
} \
while (0)
#define C_SUBFROM(res, a) \
do { \
CHECK_OVERFLOW_OP((res).r, -, (a).r) \
CHECK_OVERFLOW_OP((res).i, -, (a).i) \
(res).r -= (a).r; \
(res).i -= (a).i; \
} while (0)
#define C_SUBFROM(res, a) \
do { \
CHECK_OVERFLOW_OP((res).r, -, (a).r) \
CHECK_OVERFLOW_OP((res).i, -, (a).i) \
(res).r -= (a).r; \
(res).i -= (a).i; \
} \
while (0)
#ifdef FIXED_POINT
#define KISS_FFT_COS(phase) floor(.5 + SAMP_MAX * cos(phase))
@ -170,28 +177,28 @@ struct kiss_fft_state {
#elif defined(USE_SIMD)
#define KISS_FFT_COS(phase) _mm_set1_ps(cos(phase))
#define KISS_FFT_SIN(phase) _mm_set1_ps(sin(phase))
#define HALF_OF(x) ((x)*_mm_set1_ps(.5))
#define HALF_OF(x) ((x) * _mm_set1_ps(.5))
#else
#define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
#define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
#define HALF_OF(x) ((x)*.5)
#define HALF_OF(x) ((x) * .5)
#endif
#define kf_cexp(x, phase) \
do { \
(x)->r = KISS_FFT_COS(phase); \
(x)->i = KISS_FFT_SIN(phase); \
} while (0)
#define kf_cexp(x, phase) \
do { \
(x)->r = KISS_FFT_COS(phase); \
(x)->i = KISS_FFT_SIN(phase); \
} \
while (0)
/* a debugging function */
#define pcpx(c) \
fprintf(stderr, "%g + %gi\n", (double)((c)->r), (double)((c)->i))
#define pcpx(c) fprintf(stderr, "%g + %gi\n", (double)((c)->r), (double)((c)->i)) \
#ifdef KISS_FFT_USE_ALLOCA
// define this to allow use of alloca instead of malloc for temporary buffers
// Temporary buffers are used in two case:
// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an
// Define this to allow use of alloca instead of malloc for temporary buffers.
// Temporary buffers are used in two cases:
// 1. FFT sizes that have "bad" factors, i.e., not 2, 3 or 5
// 2. "In-place" FFTs. Notice the quotes, since kissfft does not really do an
// in-place transform.
#include <alloca.h>
#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)

File diff suppressed because it is too large Load Diff

View File

@ -16,7 +16,6 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "FreeSurround/FreeSurroundDecoder.h"
#include "FreeSurround/ChannelMaps.h"
#include <cmath>
@ -35,65 +34,65 @@ DPL2FSDecoder::~DPL2FSDecoder() {
kiss_fftr_free(inverse);
}
void DPL2FSDecoder::Init(channel_setup chsetup, unsigned int blsize,
unsigned int sample_rate) {
if (!initialized) {
setup = chsetup;
N = blsize;
samplerate = sample_rate;
void DPL2FSDecoder::Init(const channel_setup chsetup, const unsigned int blocksize, const unsigned int sample_rate) {
if (initialized)
return;
// Initialize the parameters
wnd = std::vector<double>(N);
inbuf = std::vector<float>(3 * N);
lt = std::vector<double>(N);
rt = std::vector<double>(N);
dst = std::vector<double>(N);
lf = std::vector<cplx>(N / 2 + 1);
rf = std::vector<cplx>(N / 2 + 1);
forward = kiss_fftr_alloc(N, 0, 0, 0);
inverse = kiss_fftr_alloc(N, 1, 0, 0);
C = static_cast<unsigned int>(chn_alloc[setup].size());
setup = chsetup;
N = blocksize;
samplerate = sample_rate;
// Allocate per-channel buffers
outbuf.resize((N + N / 2) * C);
signal.resize(C, std::vector<cplx>(N));
// Initialize the parameters
wnd = std::vector<double>(N);
inbuf = std::vector<float>(3 * N);
lt = std::vector<double>(N);
rt = std::vector<double>(N);
dst = std::vector<double>(N);
lf = std::vector<cplx>(N / 2 + 1);
rf = std::vector<cplx>(N / 2 + 1);
forward = kiss_fftr_alloc(N, 0, nullptr, nullptr);
inverse = kiss_fftr_alloc(N, 1, nullptr, nullptr);
C = static_cast<unsigned int>(chn_alloc[setup].size());
// Init the window function
for (unsigned int k = 0; k < N; k++)
wnd[k] = sqrt(0.5 * (1 - cos(2 * pi * k / N)) / N);
// Allocate per-channel buffers
outbuf.resize((N + N / 2) * C);
signal.resize(C, std::vector<cplx>(N));
// set default parameters
set_circular_wrap(90);
set_shift(0);
set_depth(1);
set_focus(0);
set_center_image(1);
set_front_separation(1);
set_rear_separation(1);
set_low_cutoff(40.0f / samplerate * 2);
set_high_cutoff(90.0f / samplerate * 2);
set_bass_redirection(false);
// Init the window function
for (unsigned int k = 0; k < N; k++)
wnd[k] = sqrt(0.5 * (1 - cos(2 * pi * k / N)) / N);
initialized = true;
}
// set default parameters
set_circular_wrap(90);
set_shift(0);
set_depth(1);
set_focus(0);
set_center_image(1);
set_front_separation(1);
set_rear_separation(1);
set_low_cutoff(40.0f / static_cast<float>(samplerate) * 2);
set_high_cutoff(90.0f / static_cast<float>(samplerate) * 2);
set_bass_redirection(false);
initialized = true;
}
// decode a stereo chunk, produces a multichannel chunk of the same size
// (lagged)
float *DPL2FSDecoder::decode(float *input) {
if (initialized) {
// append incoming data to the end of the input buffer
memcpy(&inbuf[N], &input[0], 8 * N);
// process first and second half, overlapped
buffered_decode(&inbuf[0]);
buffered_decode(&inbuf[N]);
// shift last half of the input to the beginning (for overlapping with a
// future block)
memcpy(&inbuf[0], &inbuf[2 * N], 4 * N);
buffer_empty = false;
return &outbuf[0];
}
return 0;
float *DPL2FSDecoder::decode(const float *input) {
if (!initialized)
return nullptr;
// append incoming data to the end of the input buffer
memcpy(&inbuf[N], &input[0], 8 * N);
// process first and second half, overlapped
buffered_decode(&inbuf[0]);
buffered_decode(&inbuf[N]);
// shift last half of the input to the beginning (for overlapping with a
// future block)
memcpy(&inbuf[0], &inbuf[2 * N], 4 * N);
buffer_empty = false;
return &outbuf[0];
}
// flush the internal buffers
@ -104,56 +103,52 @@ void DPL2FSDecoder::flush() {
}
// number of samples currently held in the buffer
unsigned int DPL2FSDecoder::buffered() { return buffer_empty ? 0 : N / 2; }
unsigned int DPL2FSDecoder::buffered() const { return buffer_empty ? 0 : N / 2; }
// set soundfield & rendering parameters
void DPL2FSDecoder::set_circular_wrap(float v) { circular_wrap = v; }
void DPL2FSDecoder::set_shift(float v) { shift = v; }
void DPL2FSDecoder::set_depth(float v) { depth = v; }
void DPL2FSDecoder::set_focus(float v) { focus = v; }
void DPL2FSDecoder::set_center_image(float v) { center_image = v; }
void DPL2FSDecoder::set_front_separation(float v) { front_separation = v; }
void DPL2FSDecoder::set_rear_separation(float v) { rear_separation = v; }
void DPL2FSDecoder::set_low_cutoff(float v) { lo_cut = v * (N / 2); }
void DPL2FSDecoder::set_high_cutoff(float v) { hi_cut = v * (N / 2); }
void DPL2FSDecoder::set_bass_redirection(bool v) { use_lfe = v; }
void DPL2FSDecoder::set_circular_wrap(const float v) { circular_wrap = v; }
void DPL2FSDecoder::set_shift(const float v) { shift = v; }
void DPL2FSDecoder::set_depth(const float v) { depth = v; }
void DPL2FSDecoder::set_focus(const float v) { focus = v; }
void DPL2FSDecoder::set_center_image(const float v) { center_image = v; }
void DPL2FSDecoder::set_front_separation(const float v) { front_separation = v; }
void DPL2FSDecoder::set_rear_separation(const float v) { rear_separation = v; }
void DPL2FSDecoder::set_low_cutoff(const float v) { lo_cut = v * static_cast<float>(N / 2.0); }
void DPL2FSDecoder::set_high_cutoff(const float v) { hi_cut = v * static_cast<float>(N / 2.0); }
void DPL2FSDecoder::set_bass_redirection(const bool v) { use_lfe = v; }
// helper functions
inline float DPL2FSDecoder::sqr(double x) { return static_cast<float>(x * x); }
inline double DPL2FSDecoder::amplitude(const cplx &x) {
return sqrt(sqr(x.real()) + sqr(x.imag()));
}
inline double DPL2FSDecoder::phase(const cplx &x) {
return atan2(x.imag(), x.real());
}
inline cplx DPL2FSDecoder::polar(double a, double p) {
return cplx(a * cos(p), a * sin(p));
}
inline float DPL2FSDecoder::min(double a, double b) {
return static_cast<float>(a < b ? a : b);
}
inline float DPL2FSDecoder::max(double a, double b) {
return static_cast<float>(a > b ? a : b);
}
inline float DPL2FSDecoder::clamp(double x) { return max(-1, min(1, x)); }
inline float DPL2FSDecoder::sign(double x) {
return static_cast<float>(x < 0 ? -1 : (x > 0 ? 1 : 0));
}
inline float DPL2FSDecoder::sqr(const double x) { return static_cast<float>(x * x); }
inline double DPL2FSDecoder::amplitude(const cplx &x) { return sqrt(sqr(x.real()) + sqr(x.imag())); }
inline double DPL2FSDecoder::phase(const cplx &x) { return atan2(x.imag(), x.real()); }
inline cplx DPL2FSDecoder::polar(const double a, const double p) { return cplx(a * cos(p), a * sin(p)); }
inline float DPL2FSDecoder::min(const double a, const double b) { return static_cast<float>(a < b ? a : b); }
inline float DPL2FSDecoder::max(const double a, const double b) { return static_cast<float>(a > b ? a : b); }
inline float DPL2FSDecoder::clamp(const double x) { return max(-1, min(1, x)); }
inline float DPL2FSDecoder::sign(const double x) { return static_cast<float>(x < 0 ? -1 : x > 0 ? 1 : 0); }
// get the distance of the soundfield edge, along a given angle
inline double DPL2FSDecoder::edgedistance(double a) {
inline double DPL2FSDecoder::edgedistance(const double a) {
return min(sqrt(1 + sqr(tan(a))), sqrt(1 + sqr(1 / tan(a))));
}
// get the index (and fractional offset!) in a piecewise-linear channel
// allocation grid
int DPL2FSDecoder::map_to_grid(double &x) {
double gp = ((x + 1) * 0.5) * (grid_res - 1),
i = min(grid_res - 2, floor(gp));
const double gp = (x + 1) * 0.5 * (grid_res - 1), i = min(grid_res - 2, floor(gp));
x = gp - i;
return static_cast<int>(i);
}
// decode a block of data and overlap-add it into outbuf
void DPL2FSDecoder::buffered_decode(float *input) {
void DPL2FSDecoder::buffered_decode(const float *input) {
// demultiplex and apply window function
for (unsigned int k = 0; k < N; k++) {
lt[k] = wnd[k] * input[k * 2 + 0];
@ -161,24 +156,21 @@ void DPL2FSDecoder::buffered_decode(float *input) {
}
// map into spectral domain
kiss_fftr(forward, &lt[0], (kiss_fft_cpx *)&lf[0]);
kiss_fftr(forward, &rt[0], (kiss_fft_cpx *)&rf[0]);
kiss_fftr(forward, &lt[0], reinterpret_cast<kiss_fft_cpx *>(&lf[0]));
kiss_fftr(forward, &rt[0], reinterpret_cast<kiss_fft_cpx *>(&rf[0]));
// compute multichannel output signal in the spectral domain
for (unsigned int f = 1; f < N / 2; f++) {
// get Lt/Rt amplitudes & phases
double ampL = amplitude(lf[f]), ampR = amplitude(rf[f]);
double phaseL = phase(lf[f]), phaseR = phase(rf[f]);
// calculate the amplitude & phase differences
double ampDiff =
clamp((ampL + ampR < epsilon) ? 0 : (ampR - ampL) / (ampR + ampL));
const double ampL = amplitude(lf[f]), ampR = amplitude(rf[f]), phaseL = phase(lf[f]), phaseR = phase(rf[f]);
// calculate the amplitude and phase differences
const double ampDiff = clamp(ampL + ampR < epsilon ? 0 : (ampR - ampL) / (ampR + ampL));
double phaseDiff = abs(phaseL - phaseR);
if (phaseDiff > pi)
phaseDiff = 2 * pi - phaseDiff;
// decode into x/y soundfield position
double x, y;
transform_decode(ampDiff, phaseDiff, x, y);
auto [x, y] = transform_decode(ampDiff, phaseDiff);
// add wrap control
transform_circular_wrap(x, y, circular_wrap);
// add shift control
@ -188,42 +180,42 @@ void DPL2FSDecoder::buffered_decode(float *input) {
// add focus control
transform_focus(x, y, focus);
// add crossfeed control
x = clamp(x *
(front_separation * (1 + y) / 2 + rear_separation * (1 - y) / 2));
x = clamp(x * (front_separation * (1 + y) / 2 + rear_separation * (1 - y) / 2));
// get total signal amplitude
double amp_total = sqrt(ampL * ampL + ampR * ampR);
const double amp_total = sqrt(ampL * ampL + ampR * ampR);
// and total L/C/R signal phases
double phase_of[] = {
phaseL, atan2(lf[f].imag() + rf[f].imag(), lf[f].real() + rf[f].real()),
phaseR};
const double phase_of[] = {phaseL, atan2(lf[f].imag() + rf[f].imag(), lf[f].real() + rf[f].real()), phaseR};
// compute 2d channel map indexes p/q and update x/y to fractional offsets
// in the map grid
int p = map_to_grid(x), q = map_to_grid(y);
const int p = map_to_grid(x), q = map_to_grid(y);
// map position to channel volumes
for (unsigned int c = 0; c < C - 1; c++) {
// look up channel map at respective position (with bilinear
// look up the channel map at respective position (with bilinear
// interpolation) and build the
// signal
std::vector<float *> &a = chn_alloc[setup][c];
signal[c][f] = polar(
amp_total * ((1 - x) * (1 - y) * a[q][p] + x * (1 - y) * a[q][p + 1] +
(1 - x) * y * a[q + 1][p] + x * y * a[q + 1][p + 1]),
phase_of[1 + static_cast<int>(sign(chn_xsf[setup][c]))]);
amp_total * ((1 - x) * (1 - y) * a[q][p] +
x * (1 - y) * a[q][p + 1] +
(1 - x) * y * a[q + 1][p] +
x * y * a[q + 1][p + 1]),
phase_of[1 + static_cast<int>(sign(chn_xsf[setup][c]))]);
}
// optionally redirect bass
if (use_lfe && f < hi_cut) {
// level of LFE channel according to normalized frequency
double lfe_level =
f < lo_cut ? 1
: 0.5 * (1 + cos(pi * (f - lo_cut) / (hi_cut - lo_cut)));
// assign LFE channel
signal[C - 1][f] = lfe_level * polar(amp_total, phase_of[1]);
// subtract the signal from the other channels
for (unsigned int c = 0; c < C - 1; c++)
signal[c][f] *= (1 - lfe_level);
}
if (!use_lfe)
continue;
const auto w = static_cast<float>(f);
if (w >= hi_cut)
continue;
// level of LFE channel according to normalized frequency
double lfe_level = w < lo_cut ? 1 : 0.5 * (1 + cos(pi * (w - lo_cut) / (hi_cut - lo_cut)));
// assign LFE channel
signal[C - 1][f] = lfe_level * polar(amp_total, phase_of[1]);
// subtract the signal from the other channels
for (unsigned int c = 0; c < C - 1; c++)
signal[c][f] *= 1 - lfe_level;
}
// shift the last 2/3 to the first 2/3 of the output buffer
@ -233,7 +225,7 @@ void DPL2FSDecoder::buffered_decode(float *input) {
// backtransform each channel and overlap-add
for (unsigned int c = 0; c < C; c++) {
// back-transform into time domain
kiss_fftri(inverse, (kiss_fft_cpx *)&signal[c][0], &dst[0]);
kiss_fftri(inverse, reinterpret_cast<kiss_fft_cpx *>(&signal[c][0]), &dst[0]);
// add the result to the last 2/3 of the output buffer, windowed (and
// remultiplex)
for (unsigned int k = 0; k < N; k++)
@ -242,39 +234,37 @@ void DPL2FSDecoder::buffered_decode(float *input) {
}
// transform amp/phase difference space into x/y soundfield space
void DPL2FSDecoder::transform_decode(double a, double p, double &x, double &y) {
x = clamp(1.0047 * a + 0.46804 * a * p * p * p - 0.2042 * a * p * p * p * p +
0.0080586 * a * p * p * p * p * p * p * p -
0.0001526 * a * p * p * p * p * p * p * p * p * p * p -
0.073512 * a * a * a * p - 0.2499 * a * a * a * p * p * p * p +
0.016932 * a * a * a * p * p * p * p * p * p * p -
0.00027707 * a * a * a * p * p * p * p * p * p * p * p * p * p +
0.048105 * a * a * a * a * a * p * p * p * p * p * p * p -
0.0065947 * a * a * a * a * a * p * p * p * p * p * p * p * p * p *
p +
0.0016006 * a * a * a * a * a * p * p * p * p * p * p * p * p * p *
p * p -
0.0071132 * a * a * a * a * a * a * a * p * p * p * p * p * p * p *
p * p +
0.0022336 * a * a * a * a * a * a * a * p * p * p * p * p * p * p *
p * p * p * p -
0.0004804 * a * a * a * a * a * a * a * p * p * p * p * p * p * p *
p * p * p * p * p);
y = clamp(
0.98592 - 0.62237 * p + 0.077875 * p * p - 0.0026929 * p * p * p * p * p +
0.4971 * a * a * p - 0.00032124 * a * a * p * p * p * p * p * p +
9.2491e-006 * a * a * a * a * p * p * p * p * p * p * p * p * p * p +
0.051549 * a * a * a * a * a * a * a * a +
1.0727e-014 * a * a * a * a * a * a * a * a * a * a);
std::tuple<double, double> DPL2FSDecoder::transform_decode(const double amp, const double phase) {
return std::make_tuple(calculate_x(amp, phase), calculate_y(amp, phase));
}
float DPL2FSDecoder::calculate_x(const double amp, const double phase) {
const double ap3 = amp * pow(phase, 3), ap4 = amp * pow(phase, 4), ap7 = amp * pow(phase, 7),
ap8 = amp * pow(phase, 8), a3p = pow(amp, 3) * phase, a3p4 = pow(amp, 3) * pow(phase, 4),
a3p7 = pow(amp, 3) * pow(phase, 7), a3p12 = pow(amp, 3) * pow(phase, 7),
a5p7 = pow(amp, 5) * pow(phase, 7), a5p12 = pow(amp, 5) * pow(phase, 12),
a5p15 = pow(amp, 5) * pow(phase, 15), a7p9 = pow(amp, 7) * pow(phase, 9),
a7p15 = pow(amp, 7) * pow(phase, 15), a8p16 = pow(amp, 8) * pow(phase, 16);
return clamp(1.0047 * amp + 0.46804 * ap3 - 0.2042 * ap4 + 0.0080586 * ap7 - 0.0001526 * ap8 - 0.073512 * a3p +
0.2499 * a3p4 - 0.016932 * a3p7 + 0.00027707 * a3p12 + 0.048105 * a5p7 - 0.0065947 * a5p12 +
0.0016006 * a5p15 - 0.0071132 * a7p9 + 0.0022336 * a7p15 - 0.0004804 * a8p16);
}
float DPL2FSDecoder::calculate_y(const double amp, const double phase) {
const double p2 = pow(phase, 2), p5 = pow(phase, 5), a2p = pow(amp, 2) * phase, a2p6 = pow(amp, 2) * pow(phase, 6),
a4p7 = pow(amp, 4) * pow(phase, 7), a8 = pow(amp, 8), a10 = pow(amp, 10);
return clamp(0.98592 - 0.62237 * phase + 0.077875 * p2 - 0.0026929 * p5 + 0.4971 * a2p - 0.00032124 * a2p6 +
9.2491e-006 * a4p7 + 0.051549 * a8 + 1.0727e-014 * a10);
}
// apply a circular_wrap transformation to some position
void DPL2FSDecoder::transform_circular_wrap(double &x, double &y,
double refangle) {
void DPL2FSDecoder::transform_circular_wrap(double &x, double &y, double refangle) {
if (refangle == 90)
return;
refangle = refangle * pi / 180;
double baseangle = 90 * pi / 180;
constexpr double baseangle = pi / 2;
// translate into edge-normalized polar coordinates
double ang = atan2(x, y), len = sqrt(x * x + y * y);
len = len / edgedistance(ang);
@ -293,15 +283,15 @@ void DPL2FSDecoder::transform_circular_wrap(double &x, double &y,
}
// apply a focus transformation to some position
void DPL2FSDecoder::transform_focus(double &x, double &y, double focus) {
void DPL2FSDecoder::transform_focus(double &x, double &y, const double focus) {
if (focus == 0)
return;
const double ang = atan2(x, y);
// translate into edge-normalized polar coordinates
double ang = atan2(x, y),
len = clamp(sqrt(x * x + y * y) / edgedistance(ang));
double len = clamp(sqrt(x * x + y * y) / edgedistance(ang));
// apply focus
len = focus > 0 ? 1 - pow(1 - len, 1 + focus * 20) : pow(len, 1 - focus * 20);
// back-transform into euclidian soundfield position
// back-transform into Euclidean soundfield position
len = len * edgedistance(ang);
x = clamp(sin(ang) * len);
y = clamp(cos(ang) * len);

View File

@ -37,19 +37,19 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "FreeSurround/_KissFFTGuts.h"
/* The guts header contains all the multiplication and addition macros that are
defined for
fixed or floating point complex numbers. It also delares the kf_ internal
functions.
#include <random>
#include <vector>
/* The guts header contains all the multiplication and addition macros
that are defined for fixed or floating point complex numbers.
It also declares the kf_ internal functions.
*/
static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride,
const kiss_fft_cfg st, int m) {
kiss_fft_cpx *Fout2;
kiss_fft_cpx *tw1 = st->twiddles;
kiss_fft_cpx t;
Fout2 = Fout + m;
static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m) {
const kiss_fft_cpx *tw1 = st->twiddles;
kiss_fft_cpx *Fout2 = Fout + m;
do {
kiss_fft_cpx t;
C_FIXDIV(*Fout, 2);
C_FIXDIV(*Fout2, 2);
@ -62,17 +62,15 @@ static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride,
} while (--m);
}
static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride,
const kiss_fft_cfg st, const size_t m) {
static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const size_t m) {
kiss_fft_cpx *tw1, *tw2, *tw3;
kiss_fft_cpx scratch[6];
size_t k = m;
const size_t m2 = 2 * m;
const size_t m3 = 3 * m;
const size_t m2 = 2 * m, m3 = 3 * m;
tw3 = tw2 = tw1 = st->twiddles;
do {
kiss_fft_cpx scratch[6];
C_FIXDIV(*Fout, 4);
C_FIXDIV(Fout[m], 4);
C_FIXDIV(Fout[m2], 4);
@ -107,18 +105,16 @@ static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride,
} while (--k);
}
static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride,
const kiss_fft_cfg st, size_t m) {
static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const size_t m) {
size_t k = m;
const size_t m2 = 2 * m;
kiss_fft_cpx *tw1, *tw2;
kiss_fft_cpx scratch[5];
kiss_fft_cpx epi3;
epi3 = st->twiddles[fstride * m];
const auto [r, i] = st->twiddles[fstride * m];
tw1 = tw2 = st->twiddles;
do {
kiss_fft_cpx scratch[5];
C_FIXDIV(*Fout, 3);
C_FIXDIV(Fout[m], 3);
C_FIXDIV(Fout[m2], 3);
@ -134,7 +130,7 @@ static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride,
Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
C_MULBYSCALAR(scratch[0], epi3.i);
C_MULBYSCALAR(scratch[0], i);
C_ADDTO(*Fout, scratch[3]);
@ -148,25 +144,14 @@ static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride,
} while (--k);
}
static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride,
const kiss_fft_cfg st, int m) {
kiss_fft_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
int u;
static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const int m) {
kiss_fft_cpx *Fout0 = Fout, *Fout1 = Fout0 + m, *Fout2 = Fout0 + 2 * m, *Fout3 = Fout0 + 3 * m,
*Fout4 = Fout0 + 4 * m;
kiss_fft_cpx scratch[13];
kiss_fft_cpx *twiddles = st->twiddles;
kiss_fft_cpx *tw;
kiss_fft_cpx ya, yb;
ya = twiddles[fstride * m];
yb = twiddles[fstride * 2 * m];
const kiss_fft_cpx *twiddles = st->twiddles, *tw = st->twiddles, ya = twiddles[fstride * m],
yb = twiddles[fstride * 2 * m];
Fout0 = Fout;
Fout1 = Fout0 + m;
Fout2 = Fout0 + 2 * m;
Fout3 = Fout0 + 3 * m;
Fout4 = Fout0 + 4 * m;
tw = st->twiddles;
for (u = 0; u < m; ++u) {
for (int u = 0; u < m; ++u) {
C_FIXDIV(*Fout0, 5);
C_FIXDIV(*Fout1, 5);
C_FIXDIV(*Fout2, 5);
@ -187,10 +172,8 @@ static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride,
Fout0->r += scratch[7].r + scratch[8].r;
Fout0->i += scratch[7].i + scratch[8].i;
scratch[5].r =
scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r);
scratch[5].i =
scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r);
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r);
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r);
scratch[6].r = S_MUL(scratch[10].i, ya.i) + S_MUL(scratch[9].i, yb.i);
scratch[6].i = -S_MUL(scratch[10].r, ya.i) - S_MUL(scratch[9].r, yb.i);
@ -198,10 +181,8 @@ static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride,
C_SUB(*Fout1, scratch[5], scratch[6]);
C_ADD(*Fout4, scratch[5], scratch[6]);
scratch[11].r =
scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r);
scratch[11].i =
scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r);
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r);
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r);
scratch[12].r = -S_MUL(scratch[10].i, yb.i) + S_MUL(scratch[9].i, ya.i);
scratch[12].i = S_MUL(scratch[10].r, yb.i) - S_MUL(scratch[9].r, ya.i);
@ -217,43 +198,35 @@ static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride,
}
/* perform the butterfly for one stage of a mixed radix FFT */
static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride,
const kiss_fft_cfg st, int m, int p) {
int u, k, q1, q;
kiss_fft_cpx *twiddles = st->twiddles;
kiss_fft_cpx t;
int Norig = st->nfft;
static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const int m, const int p) {
const kiss_fft_cpx *twiddles = st->twiddles;
const int Norig = st->nfft;
kiss_fft_cpx *scratch =
(kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * p);
const auto scratch = static_cast<kiss_fft_cpx *>(KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * p));
for (u = 0; u < m; ++u) {
k = u;
for (q1 = 0; q1 < p; ++q1) {
scratch[q1] = Fout[k];
for (int u = 0; u < m; ++u) {
for (int q1 = 0, i = u; q1 < p; ++q1, i += m) {
scratch[q1] = Fout[i];
C_FIXDIV(scratch[q1], p);
k += m;
}
k = u;
for (q1 = 0; q1 < p; ++q1) {
for (int q1 = 0, j = u; q1 < p; ++q1, j += m) {
int twidx = 0;
Fout[k] = scratch[0];
for (q = 1; q < p; ++q) {
twidx += static_cast<int>(fstride) * k;
Fout[j] = scratch[0];
for (int q = 1; q < p; ++q) {
kiss_fft_cpx t;
twidx += static_cast<int>(fstride) * j;
if (twidx >= Norig)
twidx -= Norig;
C_MUL(t, scratch[q], twiddles[twidx]);
C_ADDTO(Fout[k], t);
C_ADDTO(Fout[j], t);
}
k += m;
}
}
KISS_FFT_TMP_FREE(scratch);
}
static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f,
const size_t fstride, int in_stride, int *factors,
static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f, const size_t fstride, int in_stride, int *factors,
const kiss_fft_cfg st) {
kiss_fft_cpx *Fout_beg = Fout;
const int p = *factors++; /* the radix */
@ -269,26 +242,25 @@ static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f,
// execute the p different work units in different threads
#pragma omp parallel for
for (k = 0; k < p; ++k)
kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride,
factors, st);
kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride, factors, st);
// all threads have joined by this point
switch (p) {
case 2:
kf_bfly2(Fout, fstride, st, m);
break;
case 3:
kf_bfly3(Fout, fstride, st, m);
break;
case 4:
kf_bfly4(Fout, fstride, st, m);
break;
case 5:
kf_bfly5(Fout, fstride, st, m);
break;
default:
kf_bfly_generic(Fout, fstride, st, m, p);
break;
case 2:
kf_bfly2(Fout, fstride, st, m);
break;
case 3:
kf_bfly3(Fout, fstride, st, m);
break;
case 4:
kf_bfly4(Fout, fstride, st, m);
break;
case 5:
kf_bfly5(Fout, fstride, st, m);
break;
default:
kf_bfly_generic(Fout, fstride, st, m, p);
break;
}
return;
}
@ -314,54 +286,89 @@ static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f,
// recombine the p smaller DFTs
switch (p) {
case 2:
kf_bfly2(Fout, fstride, st, m);
break;
case 3:
kf_bfly3(Fout, fstride, st, m);
break;
case 4:
kf_bfly4(Fout, fstride, st, m);
break;
case 5:
kf_bfly5(Fout, fstride, st, m);
break;
default:
kf_bfly_generic(Fout, fstride, st, m, p);
break;
case 2:
kf_bfly2(Fout, fstride, st, m);
break;
case 3:
kf_bfly3(Fout, fstride, st, m);
break;
case 4:
kf_bfly4(Fout, fstride, st, m);
break;
case 5:
kf_bfly5(Fout, fstride, st, m);
break;
default:
kf_bfly_generic(Fout, fstride, st, m, p);
break;
}
}
/* facbuf is populated by p1,m1,p2,m2, ...
where
p[i] * m[i] = m[i-1]
m0 = n */
static void kf_factor(int n, int *facbuf) {
int p = 4;
double floor_sqrt;
floor_sqrt = floor(sqrt((double)n));
/**
* @brief Implements Pollard's Rho algorithm to find a non-trivial factor of n.
*
* This function uses Pollard's Rho algorithm, a probabilistic integer factorization method.
* The algorithm generates a sequence of numbers based on a quadratic recurrence and uses the
* "tortoise and hare" technique to detect cycles. If a cycle is detected, the GCD of the
* difference between two sequence values and n is computed, which yields a factor of n.
*
* If n is even, the function immediately returns 2. Otherwise, the algorithm iterates
* until it finds a non-trivial factor or determines that no factor exists.
*
* @param n The integer to factor. The function will return a non-trivial factor of n.
*
* @return A non-trivial factor of n if found, otherwise 0 if no factor is found.
*/
int pollards_rho(const int n) {
if (n % 2 == 0)
return 2;
/*factor out powers of 4, powers of 2, then any remaining primes */
do {
while (n % p) {
switch (p) {
case 4:
p = 2;
break;
case 2:
p = 3;
break;
default:
p += 2;
break;
}
if (p > floor_sqrt)
p = n; /* no more factors, skip to end */
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution dist(1, n - 1);
int64_t x = dist(gen);
int64_t y = x;
const int64_t c = dist(gen);
int64_t d = 1;
while (d == 1) {
x = (x * x + c) % n;
y = (y * y + c) % n;
y = (y * y + c) % n;
d = std::gcd(std::llabs(x - y), static_cast<int64_t>(n));
}
return d == n ? 0 : static_cast<int>(d);
}
/**
* @brief Factorizes a number using Pollard's Rho algorithm.
*
* This function attempts to factor the given number `n` by repeatedly calling the Pollard's Rho algorithm.
* It continuously divides `n` by each found factor and stores the factors in the provided `facbuf` array.
* This method returns the prime factors of `n` and updates the array with factor pairs:
* each factor and the quotient of `n` after dividing by the factor.
*
* The factorization process continues until all factors of `n` are found,
* or until Pollard's Rho fails to find further factors.
*
* @param n The integer to factor. It will be reduced during the process.
* @param facbuf A pointer to an array where the factors will be stored.
* The factors are stored as pairs: each factor and the quotient of n after dividing by the factor.
*/
void kf_factor(int n, int *facbuf) {
while (n > 1) {
const int factor = pollards_rho(n);
if (factor == 0)
break;
while (n % factor == 0) {
n /= factor;
*facbuf++ = factor;
*facbuf++ = n;
}
n /= p;
*facbuf++ = p;
*facbuf++ = n;
} while (n > 1);
}
}
/*
@ -372,73 +379,87 @@ static void kf_factor(int n, int *facbuf) {
* such,
* It can be freed with free(), rather than a kiss_fft-specific function.
* */
kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem,
size_t *lenmem) {
kiss_fft_cfg st = NULL;
size_t memneeded = sizeof(struct kiss_fft_state) +
sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/
kiss_fft_cfg kiss_fft_alloc(const int nfft, const int inverse_fft, void *mem, size_t *lenmem) {
kiss_fft_cfg st = nullptr;
const size_t memneeded = sizeof(struct kiss_fft_state) + sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/
if (lenmem == NULL) {
st = (kiss_fft_cfg) new char[memneeded];
if (lenmem == nullptr) {
st = reinterpret_cast<kiss_fft_cfg>(new char[memneeded]);
} else {
if (mem != NULL && *lenmem >= memneeded)
st = (kiss_fft_cfg)mem;
if (mem != nullptr && *lenmem >= memneeded)
st = static_cast<kiss_fft_cfg>(mem);
*lenmem = memneeded;
}
if (st) {
int i;
st->nfft = nfft;
st->inverse = inverse_fft;
for (i = 0; i < nfft; ++i) {
const double pi =
3.141592653589793238462643383279502884197169399375105820974944;
double phase = -2 * pi * i / nfft;
if (st->inverse)
phase *= -1;
kf_cexp(st->twiddles + i, phase);
}
kf_factor(nfft, st->factors);
if (!st) {
return st;
}
st->nfft = nfft;
st->inverse = inverse_fft;
for (int i = 0; i < nfft; ++i) {
double phase = -2 * pi * i / nfft;
if (st->inverse)
phase *= -1;
kf_cexp(st->twiddles + i, phase);
}
kf_factor(nfft, st->factors);
return st;
}
void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin,
kiss_fft_cpx *fout, int in_stride) {
if (fin == fout) {
// NOTE: this is not really an in-place FFT algorithm.
// It just performs an out-of-place FFT into a temp buffer
kiss_fft_cpx *tmpbuf =
(kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * st->nfft);
kf_work(tmpbuf, fin, 1, in_stride, st->factors, st);
memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * st->nfft);
KISS_FFT_TMP_FREE(tmpbuf);
} else {
kf_work(fout, fin, 1, in_stride, st->factors, st);
void kiss_fft_stride(const kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout, const int fin_stride) {
if (fin != fout) {
kf_work(fout, fin, 1, fin_stride, cfg->factors, cfg);
return;
}
// NOTE: this is not really an in-place FFT algorithm.
// It just performs an out-of-place FFT into a temp buffer
auto *tmpbuf = static_cast<kiss_fft_cpx *>(KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * cfg->nfft));
kf_work(tmpbuf, fin, 1, fin_stride, cfg->factors, cfg);
memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * cfg->nfft);
KISS_FFT_TMP_FREE(tmpbuf);
}
void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout) {
void kiss_fft(const kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout) {
kiss_fft_stride(cfg, fin, fout, 1);
}
void kiss_fft_cleanup(void) {
// nothing needed any more
}
/**
* Finds the next largest integer that can be expressed as a product of
* the prime factors 2, 3, and 5. This ensures the number is factorable
* by these primes, making it suitable for optimized FFT computations.
*
* @param n The starting integer to search from.
* @return The smallest integer greater than or equal to `n`
* divisible only by the primes 2, 3, and 5.
*/
int kiss_fft_next_fast_size(const int n) {
std::vector hammingNumbers = {1}; // Start with 1 as the smallest Hamming number
int i2 = 0, i3 = 0, i5 = 0; // Pointers for multiples of 2, 3, and 5
int kiss_fft_next_fast_size(int n) {
while (1) {
int m = n;
while ((m % 2) == 0)
m /= 2;
while ((m % 3) == 0)
m /= 3;
while ((m % 5) == 0)
m /= 5;
if (m <= 1)
break; /* n is completely factorable by twos, threes, and fives */
n++;
while (true) {
// Generate the next candidates by multiplying with 2, 3, and 5
int next2 = hammingNumbers[i2] * 2;
int next3 = hammingNumbers[i3] * 3;
int next5 = hammingNumbers[i5] * 5;
// Find the smallest candidate
int nextHamming = std::min({next2, next3, next5});
// If the candidate is >= n, return it
if (nextHamming >= n) {
return nextHamming;
}
// Add the smallest candidate to the list
hammingNumbers.push_back(nextHamming);
// Increment the respective pointer(s)
if (nextHamming == next2)
i2++;
if (nextHamming == next3)
i3++;
if (nextHamming == next5)
i5++;
}
return n;
}

View File

@ -38,6 +38,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "FreeSurround/KissFFTR.h"
#include "FreeSurround/_KissFFTGuts.h"
#include <cstdio>
struct kiss_fftr_state {
kiss_fft_cfg substate;
@ -48,40 +49,36 @@ struct kiss_fftr_state {
#endif
};
kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void *mem,
size_t *lenmem) {
int i;
kiss_fftr_cfg st = NULL;
kiss_fftr_cfg kiss_fftr_alloc(int nfft, const int inverse_fft, void *mem, size_t *lenmem) {
kiss_fftr_cfg st = nullptr;
size_t subsize = 65536 * 4, memneeded = 0;
if (nfft & 1) {
fprintf(stderr, "Real FFT optimization must be even.\n");
return NULL;
return nullptr;
}
nfft >>= 1;
kiss_fft_alloc(nfft, inverse_fft, NULL, &subsize);
memneeded = sizeof(struct kiss_fftr_state) + subsize +
sizeof(kiss_fft_cpx) * (nfft * 3 / 2);
kiss_fft_alloc(nfft, inverse_fft, nullptr, &subsize);
memneeded = sizeof(kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * (nfft * 3 / 2);
if (lenmem == NULL) {
st = (kiss_fftr_cfg)malloc(memneeded);
if (lenmem == nullptr) {
st = static_cast<kiss_fftr_cfg>(malloc(memneeded));
} else {
if (*lenmem >= memneeded)
st = (kiss_fftr_cfg)mem;
st = static_cast<kiss_fftr_cfg>(mem);
*lenmem = memneeded;
}
if (!st)
return NULL;
return nullptr;
st->substate = (kiss_fft_cfg)(st + 1); /*just beyond kiss_fftr_state struct */
st->tmpbuf = (kiss_fft_cpx *)(((char *)st->substate) + subsize);
st->substate = reinterpret_cast<kiss_fft_cfg>(st + 1); /*just beyond kiss_fftr_state struct */
st->tmpbuf = reinterpret_cast<kiss_fft_cpx *>(reinterpret_cast<char *>(st->substate) + subsize);
st->super_twiddles = st->tmpbuf + nfft;
kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
for (i = 0; i < nfft / 2; ++i) {
double phase =
-3.14159265358979323846264338327 * ((double)(i + 1) / nfft + .5);
for (int i = 0; i < nfft / 2; ++i) {
double phase = -pi * (static_cast<double>(i + 1) / nfft + .5);
if (inverse_fft)
phase *= -1;
kf_cexp(st->super_twiddles + i, phase);
@ -89,21 +86,19 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void *mem,
return st;
}
void kiss_fftr(kiss_fftr_cfg st, const kiss_fft_scalar *timedata,
kiss_fft_cpx *freqdata) {
void kiss_fftr(kiss_fftr_cfg cfg, const kiss_fft_scalar *timedata, kiss_fft_cpx *freqdata) {
/* input buffer timedata is stored row-wise */
int k, ncfft;
kiss_fft_cpx fpnk, fpk, f1k, f2k, tw, tdc;
kiss_fft_cpx tdc;
if (st->substate->inverse) {
if (cfg->substate->inverse) {
fprintf(stderr, "kiss fft usage error: improper alloc\n");
exit(1);
}
ncfft = st->substate->nfft;
int ncfft = cfg->substate->nfft;
/*perform the parallel fft of two real signals packed in real,imag*/
kiss_fft(st->substate, (const kiss_fft_cpx *)timedata, st->tmpbuf);
kiss_fft(cfg->substate, reinterpret_cast<const kiss_fft_cpx *>(timedata), cfg->tmpbuf);
/* The real part of the DC element of the frequency spectrum in st->tmpbuf
* contains the sum of the even-numbered elements of the input time sequence
* The imag part is the sum of the odd-numbered elements
@ -115,29 +110,30 @@ void kiss_fftr(kiss_fftr_cfg st, const kiss_fft_scalar *timedata,
* yielding Nyquist bin of input time sequence
*/
tdc.r = st->tmpbuf[0].r;
tdc.i = st->tmpbuf[0].i;
tdc.r = cfg->tmpbuf[0].r;
tdc.i = cfg->tmpbuf[0].i;
C_FIXDIV(tdc, 2);
CHECK_OVERFLOW_OP(tdc.r, +, tdc.i);
CHECK_OVERFLOW_OP(tdc.r, -, tdc.i);
freqdata[0].r = tdc.r + tdc.i;
freqdata[ncfft].r = tdc.r - tdc.i;
#ifdef USE_SIMD
freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
#else
freqdata[ncfft].i = freqdata[0].i = 0;
#endif
for (k = 1; k <= ncfft / 2; ++k) {
fpk = st->tmpbuf[k];
fpnk.r = st->tmpbuf[ncfft - k].r;
fpnk.i = -st->tmpbuf[ncfft - k].i;
for (int k = 1; k <= ncfft / 2; ++k) {
kiss_fft_cpx fpnk, f1k, f2k, tw;
const kiss_fft_cpx fpk = cfg->tmpbuf[k];
fpnk.r = cfg->tmpbuf[ncfft - k].r;
fpnk.i = -cfg->tmpbuf[ncfft - k].i;
C_FIXDIV(fpk, 2);
C_FIXDIV(fpnk, 2);
C_ADD(f1k, fpk, fpnk);
C_SUB(f2k, fpk, fpnk);
C_MUL(tw, f2k, st->super_twiddles[k - 1]);
C_MUL(tw, f2k, cfg->super_twiddles[k - 1]);
freqdata[k].r = HALF_OF(f1k.r + tw.r);
freqdata[k].i = HALF_OF(f1k.i + tw.i);
@ -146,25 +142,23 @@ void kiss_fftr(kiss_fftr_cfg st, const kiss_fft_scalar *timedata,
}
}
void kiss_fftri(kiss_fftr_cfg st, const kiss_fft_cpx *freqdata,
kiss_fft_scalar *timedata) {
void kiss_fftri(kiss_fftr_cfg cfg, const kiss_fft_cpx *freqdata, kiss_fft_scalar *timedata) {
/* input buffer timedata is stored row-wise */
int k, ncfft;
if (st->substate->inverse == 0) {
if (cfg->substate->inverse == 0) {
fprintf(stderr, "kiss fft usage error: improper alloc\n");
exit(1);
}
ncfft = st->substate->nfft;
int ncfft = cfg->substate->nfft;
st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
cfg->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
cfg->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
C_FIXDIV(st->tmpbuf[0], 2);
for (k = 1; k <= ncfft / 2; ++k) {
kiss_fft_cpx fk, fnkc, fek, fok, tmp;
fk = freqdata[k];
for (int k = 1; k <= ncfft / 2; ++k) {
kiss_fft_cpx fnkc, fek, fok, tmp;
const kiss_fft_cpx fk = freqdata[k];
fnkc.r = freqdata[ncfft - k].r;
fnkc.i = -freqdata[ncfft - k].i;
C_FIXDIV(fk, 2);
@ -172,14 +166,14 @@ void kiss_fftri(kiss_fftr_cfg st, const kiss_fft_cpx *freqdata,
C_ADD(fek, fk, fnkc);
C_SUB(tmp, fk, fnkc);
C_MUL(fok, tmp, st->super_twiddles[k - 1]);
C_ADD(st->tmpbuf[k], fek, fok);
C_SUB(st->tmpbuf[ncfft - k], fek, fok);
C_MUL(fok, tmp, cfg->super_twiddles[k - 1]);
C_ADD(cfg->tmpbuf[k], fek, fok);
C_SUB(cfg->tmpbuf[ncfft - k], fek, fok);
#ifdef USE_SIMD
st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
#else
st->tmpbuf[ncfft - k].i *= -1;
cfg->tmpbuf[ncfft - k].i *= -1;
#endif
}
kiss_fft(st->substate, st->tmpbuf, (kiss_fft_cpx *)timedata);
kiss_fft(cfg->substate, cfg->tmpbuf, reinterpret_cast<kiss_fft_cpx *>(timedata));
}