Update FreeSurround

This commit is contained in:
tygyh 2024-11-24 13:27:33 +01:00
parent 401d6e70f6
commit e95b62c36b
10 changed files with 1856 additions and 1605 deletions

View File

@ -1,5 +1,5 @@
if (NOT MSVC) if (NOT MSVC)
set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_EXTENSIONS OFF)
endif() endif()

View File

@ -15,14 +15,12 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/ */
#pragma once
#ifndef CHANNELMAPS_H
#define CHANNELMAPS_H
#include "FreeSurroundDecoder.h" #include "FreeSurroundDecoder.h"
#include <map> #include <map>
#include <vector>
const int grid_res = 21; // resolution of the lookup grid constexpr int grid_res = 21; // resolution of the lookup grid
// channel allocation maps (per setup) // channel allocation maps (per setup)
typedef std::vector<std::vector<float *>> alloc_lut; typedef std::vector<std::vector<float *>> alloc_lut;
@ -32,5 +30,3 @@ extern std::map<unsigned, std::vector<float>> chn_angle;
extern std::map<unsigned, std::vector<float>> chn_xsf; extern std::map<unsigned, std::vector<float>> chn_xsf;
extern std::map<unsigned, std::vector<float>> chn_ysf; extern std::map<unsigned, std::vector<float>> chn_ysf;
extern std::map<unsigned, std::vector<channel_id>> chn_id; extern std::map<unsigned, std::vector<channel_id>> chn_id;
#endif

View File

@ -14,9 +14,8 @@
// along with this program; if not, write to the Free Software // along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
// USA. // USA.
#pragma once
#ifndef FREESURROUND_DECODER_H
#define FREESURROUND_DECODER_H
#include "KissFFTR.h" #include "KissFFTR.h"
#include <complex> #include <complex>
#include <vector> #include <vector>
@ -52,12 +51,10 @@ typedef enum channel_id {
// of channels that are present. Here is a graphic of the cs_5point1 setup: // of channels that are present. Here is a graphic of the cs_5point1 setup:
// http://en.wikipedia.org/wiki/File:5_1_channels_(surround_sound)_label.svg // http://en.wikipedia.org/wiki/File:5_1_channels_(surround_sound)_label.svg
typedef enum channel_setup { typedef enum channel_setup {
cs_5point1 = ci_front_left | ci_front_center | ci_front_right | ci_back_left | cs_5point1 = ci_front_left | ci_front_center | ci_front_right | ci_back_left | ci_back_right | ci_lfe,
ci_back_right | ci_lfe,
cs_7point1 = ci_front_left | ci_front_center | ci_front_right | cs_7point1 = ci_front_left | ci_front_center | ci_front_right | ci_side_center_left | ci_side_center_right |
ci_side_center_left | ci_side_center_right | ci_back_left | ci_back_left | ci_back_right | ci_lfe
ci_back_right | ci_lfe
} channel_setup; } channel_setup;
// The FreeSurround decoder. // The FreeSurround decoder.
@ -68,15 +65,15 @@ public:
// @param setup The output channel setup -- determines the number of output // @param setup The output channel setup -- determines the number of output
// channels and their place in the sound field. // channels and their place in the sound field.
// @param blocksize Granularity at which data is processed by the decode() // @param blocksize Granularity at which data is processed by the decode()
// function. Must be a power of two and should correspond to ca. 10ms worth // function. Must be a power of two and should correspond to ca. 10 ms worth
// of single-channel samples (default is 4096 for 44.1Khz data). Do not make // of single-channel samples (default is 4096 for 44.1Khz data). Do not make
// it shorter or longer than 5ms to 20ms since the granularity at which // it shorter or longer than 5 ms to 20 ms since the granularity at which
// locations are decoded changes with this. // locations are decoded changes with this.
DPL2FSDecoder(); DPL2FSDecoder();
~DPL2FSDecoder(); ~DPL2FSDecoder();
void Init(channel_setup setup = cs_5point1, unsigned int blocksize = 4096, void Init(channel_setup chsetup = cs_5point1, unsigned int blocksize = 4096, unsigned int sample_rate = 48000);
unsigned int samplerate = 48000);
// Decode a chunk of stereo sound. The output is delayed by half of the // Decode a chunk of stereo sound. The output is delayed by half of the
// blocksize. This function is the only one needed for straightforward // blocksize. This function is the only one needed for straightforward
@ -86,7 +83,7 @@ public:
// @return A pointer to an internal buffer of exactly blocksize (multiplexed) // @return A pointer to an internal buffer of exactly blocksize (multiplexed)
// multichannel samples. The actual number of values depends on the number of // multichannel samples. The actual number of values depends on the number of
// output channels in the chosen channel setup. // output channels in the chosen channel setup.
float *decode(float *input); float *decode(const float *input);
// Flush the internal buffer. // Flush the internal buffer.
void flush(); void flush();
@ -94,22 +91,30 @@ public:
// set soundfield & rendering parameters // set soundfield & rendering parameters
// for more information, see full FreeSurround source code // for more information, see full FreeSurround source code
void set_circular_wrap(float v); void set_circular_wrap(float v);
void set_shift(float v); void set_shift(float v);
void set_depth(float v); void set_depth(float v);
void set_focus(float v); void set_focus(float v);
void set_center_image(float v); void set_center_image(float v);
void set_front_separation(float v); void set_front_separation(float v);
void set_rear_separation(float v); void set_rear_separation(float v);
void set_low_cutoff(float v); void set_low_cutoff(float v);
void set_high_cutoff(float v); void set_high_cutoff(float v);
void set_bass_redirection(bool v); void set_bass_redirection(bool v);
// number of samples currently held in the buffer // number of samples currently held in the buffer
unsigned int buffered(); [[nodiscard]] unsigned int buffered() const;
private: private:
// constants // constants
const float pi = 3.141592654f;
const float epsilon = 0.000001f; const float epsilon = 0.000001f;
// number of samples per input/output block, number of output channels // number of samples per input/output block, number of output channels
@ -175,35 +180,45 @@ private:
// the signal to be constructed in every channel, in the frequency domain // the signal to be constructed in every channel, in the frequency domain
// instantiate the decoder with a given channel setup and processing block // instantiate the decoder with a given channel setup and processing block
// size (in samples) // size (in samples)
std::vector<std::vector<cplx>> signal; std::vector<std::vector<cplx> > signal;
// helper functions // helper functions
inline float sqr(double x); static inline float sqr(double x);
inline double amplitude(const cplx &x);
inline double phase(const cplx &x); static inline double amplitude(const cplx &x);
inline cplx polar(double a, double p);
inline float min(double a, double b); static inline double phase(const cplx &x);
inline float max(double a, double b);
inline float clamp(double x); static inline cplx polar(double a, double p);
inline float sign(double x);
static inline float min(double a, double b);
static inline float max(double a, double b);
static inline float clamp(double x);
static inline float sign(double x);
// get the distance of the soundfield edge, along a given angle // get the distance of the soundfield edge, along a given angle
inline double edgedistance(double a); static inline double edgedistance(double a);
// get the index (and fractional offset!) in a piecewise-linear channel // get the index (and fractional offset!) in a piecewise-linear channel
// allocation grid // allocation grid
int map_to_grid(double &x); static int map_to_grid(double &x);
// decode a block of data and overlap-add it into outbuf // decode a block of data and overlap-add it into outbuf
void buffered_decode(float *input); void buffered_decode(const float *input);
// transform amp/phase difference space into x/y soundfield space // transform amp/phase difference space into x/y soundfield space
void transform_decode(double a, double p, double &x, double &y); static std::tuple<double, double> transform_decode(double amp, double phase);
static float calculate_x(double amp, double phase);
static float calculate_y(double amp, double phase);
// apply a circular_wrap transformation to some position // apply a circular_wrap transformation to some position
void transform_circular_wrap(double &x, double &y, double refangle); static void transform_circular_wrap(double &x, double &y, double refangle);
// apply a focus transformation to some position // apply a focus transformation to some position
void transform_focus(double &x, double &y, double focus); static void transform_focus(double &x, double &y, double focus);
}; };
#endif

View File

@ -1,10 +1,13 @@
#ifndef KISS_FFT_H #pragma once
#define KISS_FFT_H
#include <math.h> #include <cmath>
#include <stdio.h> #include <cstring>
#include <stdlib.h> #include <cstdlib>
#include <string.h> #include <numbers>
using std::abs;
using std::sqrt;
using std::numbers::pi;
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -51,8 +54,8 @@ extern "C" {
#endif #endif
typedef struct { typedef struct {
kiss_fft_scalar r; kiss_fft_scalar r;
kiss_fft_scalar i; kiss_fft_scalar i;
} kiss_fft_cpx; } kiss_fft_cpx;
typedef struct kiss_fft_state *kiss_fft_cfg; typedef struct kiss_fft_state *kiss_fft_cfg;
@ -60,9 +63,9 @@ typedef struct kiss_fft_state *kiss_fft_cfg;
/* /*
* kiss_fft_alloc * kiss_fft_alloc
* *
* Initialize a FFT (or IFFT) algorithm's cfg/state buffer. * Initialize an FFT (or IFFT) algorithm's cfg/state buffer.
* *
* typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL); * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
* *
* The return value from fft_alloc is a cfg buffer used internally * The return value from fft_alloc is a cfg buffer used internally
* by the fft routine or NULL. * by the fft routine or NULL.
@ -76,13 +79,12 @@ typedef struct kiss_fft_state *kiss_fft_cfg;
* then the function places the cfg in mem and the size used in *lenmem * then the function places the cfg in mem and the size used in *lenmem
* and returns mem. * and returns mem.
* *
* If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), * If lenmem is not NULL and (mem is NULL or *lenmem is not large enough),
* then the function returns NULL and places the minimum cfg * then the function returns NULL and places the minimum cfg
* buffer size in *lenmem. * buffer size in *lenmem.
* */ * */
kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem, kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem);
size_t *lenmem);
/* /*
* kiss_fft(cfg,in_out_buf) * kiss_fft(cfg,in_out_buf)
@ -100,8 +102,7 @@ void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
A more generic version of the above function. It reads its input from every Nth A more generic version of the above function. It reads its input from every Nth
sample. sample.
* */ * */
void kiss_fft_stride(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, void kiss_fft_stride(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout, int fin_stride);
kiss_fft_cpx *fout, int fin_stride);
/* If kiss_fft_alloc allocated a buffer, it is one contiguous /* If kiss_fft_alloc allocated a buffer, it is one contiguous
buffer and can be simply free()d when no longer needed*/ buffer and can be simply free()d when no longer needed*/
@ -121,11 +122,8 @@ void kiss_fft_cleanup(void);
int kiss_fft_next_fast_size(int n); int kiss_fft_next_fast_size(int n);
/* for real ffts, we need an even size */ /* for real ffts, we need an even size */
#define kiss_fftr_next_fast_size_real(n) \ #define kiss_fftr_next_fast_size_real(n) (kiss_fft_next_fast_size(((n) + 1) >> 1) << 1)
(kiss_fft_next_fast_size(((n) + 1) >> 1) << 1)
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif

View File

@ -1,5 +1,4 @@
#ifndef KISS_FTR_H #pragma once
#define KISS_FTR_H
#include "KissFFT.h" #include "KissFFT.h"
#ifdef __cplusplus #ifdef __cplusplus
@ -17,23 +16,23 @@ extern "C" {
typedef struct kiss_fftr_state *kiss_fftr_cfg; typedef struct kiss_fftr_state *kiss_fftr_cfg;
kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void *mem, kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem);
size_t *lenmem);
/* /*
nfft must be even nfft must be even
If you don't care to allocate space, use mem = lenmem = NULL If you don't care to allocate space, use mem = lenmem = NULL
*/ */
void kiss_fftr(kiss_fftr_cfg cfg, const kiss_fft_scalar *timedata, void kiss_fftr(kiss_fftr_cfg cfg, const kiss_fft_scalar *timedata, kiss_fft_cpx *freqdata);
kiss_fft_cpx *freqdata);
/* /*
input timedata has nfft scalar points input timedata has nfft scalar points
output freqdata has nfft/2+1 complex points output freqdata has nfft/2+1 complex points
*/ */
void kiss_fftri(kiss_fftr_cfg cfg, const kiss_fft_cpx *freqdata, void kiss_fftri(kiss_fftr_cfg cfg, const kiss_fft_cpx *freqdata, kiss_fft_scalar *timedata);
kiss_fft_scalar *timedata);
/* /*
input freqdata has nfft/2+1 complex points input freqdata has nfft/2+1 complex points
output timedata has nfft scalar points output timedata has nfft scalar points
@ -44,4 +43,3 @@ void kiss_fftri(kiss_fftr_cfg cfg, const kiss_fft_cpx *freqdata,
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif

View File

@ -40,11 +40,11 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
defines kiss_fft_scalar as either short or a float type defines kiss_fft_scalar as either short or a float type
and defines and defines
typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
#pragma once
#include "KissFFT.h" #include "KissFFT.h"
#include <limits.h>
#define MAXFACTORS 32 #define MAXFACTORS 32
/* e.g. an fft of length 128 has 4 factors /* e.g., a fft of length 128 has 4 factors
as far as kissfft is concerned as far as kissfft is concerned
4*4*4*2 4*4*4*2
*/ */
@ -79,13 +79,11 @@ struct kiss_fft_state {
#define SAMP_MIN -SAMP_MAX #define SAMP_MIN -SAMP_MAX
#if defined(CHECK_OVERFLOW) #if defined(CHECK_OVERFLOW)
#define CHECK_OVERFLOW_OP(a, op, b) \ #define CHECK_OVERFLOW_OP(a, op, b) \
if ((SAMPPROD)(a)op(SAMPPROD)(b) > SAMP_MAX || \ if ((SAMPPROD)(a)op(SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a)op(SAMPPROD)(b) < SAMP_MIN) { \
(SAMPPROD)(a)op(SAMPPROD)(b) < SAMP_MIN) { \ fprintf(stderr, "WARNING:overflow @ " __FILE__ "(%d): (%d " #op " %d) = %ld\n", __LINE__, (a), (b), \
fprintf(stderr, \ (SAMPPROD)(a)op(SAMPPROD)(b)); \
"WARNING:overflow @ " __FILE__ "(%d): (%d " #op " %d) = %ld\n", \ }
__LINE__, (a), (b), (SAMPPROD)(a)op(SAMPPROD)(b)); \
}
#endif #endif
#define smul(a, b) ((SAMPPROD)(a) * (b)) #define smul(a, b) ((SAMPPROD)(a) * (b))
@ -93,75 +91,84 @@ struct kiss_fft_state {
#define S_MUL(a, b) sround(smul(a, b)) #define S_MUL(a, b) sround(smul(a, b))
#define C_MUL(m, a, b) \ #define C_MUL(m, a, b) \
do { \ do { \
(m).r = sround(smul((a).r, (b).r) - smul((a).i, (b).i)); \ (m).r = sround(smul((a).r, (b).r) - smul((a).i, (b).i)); \
(m).i = sround(smul((a).r, (b).i) + smul((a).i, (b).r)); \ (m).i = sround(smul((a).r, (b).i) + smul((a).i, (b).r)); \
} while (0) } \
while (0)
#define DIVSCALAR(x, k) (x) = sround(smul(x, SAMP_MAX / k)) #define DIVSCALAR(x, k) (x) = sround(smul(x, SAMP_MAX / k))
#define C_FIXDIV(c, div) \ #define C_FIXDIV(c, div) \
do { \ do { \
DIVSCALAR((c).r, div); \ DIVSCALAR((c).r, div); \
DIVSCALAR((c).i, div); \ DIVSCALAR((c).i, div); \
} while (0) } \
while (0)
#define C_MULBYSCALAR(c, s) \ #define C_MULBYSCALAR(c, s) \
do { \ do { \
(c).r = sround(smul((c).r, s)); \ (c).r = sround(smul((c).r, s)); \
(c).i = sround(smul((c).i, s)); \ (c).i = sround(smul((c).i, s)); \
} while (0) } \
while (0)
#else /* not FIXED_POINT*/ #else /* not FIXED_POINT*/
#define S_MUL(a, b) ((a) * (b)) #define S_MUL(a, b) ((a) * (b))
#define C_MUL(m, a, b) \ #define C_MUL(m, a, b) \
do { \ do { \
(m).r = (a).r * (b).r - (a).i * (b).i; \ (m).r = (a).r * (b).r - (a).i * (b).i; \
(m).i = (a).r * (b).i + (a).i * (b).r; \ (m).i = (a).r * (b).i + (a).i * (b).r; \
} while (0) } \
while (0)
#define C_FIXDIV(c, div) /* NOOP */ #define C_FIXDIV(c, div) /* NOOP */
#define C_MULBYSCALAR(c, s) \ #define C_MULBYSCALAR(c, s) \
do { \ do { \
(c).r *= (s); \ (c).r *= (s); \
(c).i *= (s); \ (c).i *= (s); \
} while (0) } \
while (0)
#endif #endif
#ifndef CHECK_OVERFLOW_OP #ifndef CHECK_OVERFLOW_OP
#define CHECK_OVERFLOW_OP(a, op, b) /* noop */ #define CHECK_OVERFLOW_OP(a, op, b) /* noop */
#endif #endif
#define C_ADD(res, a, b) \ #define C_ADD(res, a, b) \
do { \ do { \
CHECK_OVERFLOW_OP((a).r, +, (b).r) \ CHECK_OVERFLOW_OP((a).r, +, (b).r) \
CHECK_OVERFLOW_OP((a).i, +, (b).i) \ CHECK_OVERFLOW_OP((a).i, +, (b).i) \
(res).r = (a).r + (b).r; \ (res).r = (a).r + (b).r; \
(res).i = (a).i + (b).i; \ (res).i = (a).i + (b).i; \
} while (0) } \
#define C_SUB(res, a, b) \ while (0)
do { \ #define C_SUB(res, a, b) \
CHECK_OVERFLOW_OP((a).r, -, (b).r) \ do { \
CHECK_OVERFLOW_OP((a).i, -, (b).i) \ CHECK_OVERFLOW_OP((a).r, -, (b).r) \
(res).r = (a).r - (b).r; \ CHECK_OVERFLOW_OP((a).i, -, (b).i) \
(res).i = (a).i - (b).i; \ (res).r = (a).r - (b).r; \
} while (0) (res).i = (a).i - (b).i; \
#define C_ADDTO(res, a) \ } \
do { \ while (0)
CHECK_OVERFLOW_OP((res).r, +, (a).r) \ #define C_ADDTO(res, a) \
CHECK_OVERFLOW_OP((res).i, +, (a).i) \ do { \
(res).r += (a).r; \ CHECK_OVERFLOW_OP((res).r, +, (a).r) \
(res).i += (a).i; \ CHECK_OVERFLOW_OP((res).i, +, (a).i) \
} while (0) (res).r += (a).r; \
(res).i += (a).i; \
} \
while (0)
#define C_SUBFROM(res, a) \ #define C_SUBFROM(res, a) \
do { \ do { \
CHECK_OVERFLOW_OP((res).r, -, (a).r) \ CHECK_OVERFLOW_OP((res).r, -, (a).r) \
CHECK_OVERFLOW_OP((res).i, -, (a).i) \ CHECK_OVERFLOW_OP((res).i, -, (a).i) \
(res).r -= (a).r; \ (res).r -= (a).r; \
(res).i -= (a).i; \ (res).i -= (a).i; \
} while (0) } \
while (0)
#ifdef FIXED_POINT #ifdef FIXED_POINT
#define KISS_FFT_COS(phase) floor(.5 + SAMP_MAX * cos(phase)) #define KISS_FFT_COS(phase) floor(.5 + SAMP_MAX * cos(phase))
@ -170,28 +177,28 @@ struct kiss_fft_state {
#elif defined(USE_SIMD) #elif defined(USE_SIMD)
#define KISS_FFT_COS(phase) _mm_set1_ps(cos(phase)) #define KISS_FFT_COS(phase) _mm_set1_ps(cos(phase))
#define KISS_FFT_SIN(phase) _mm_set1_ps(sin(phase)) #define KISS_FFT_SIN(phase) _mm_set1_ps(sin(phase))
#define HALF_OF(x) ((x)*_mm_set1_ps(.5)) #define HALF_OF(x) ((x) * _mm_set1_ps(.5))
#else #else
#define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) #define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
#define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) #define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
#define HALF_OF(x) ((x)*.5) #define HALF_OF(x) ((x) * .5)
#endif #endif
#define kf_cexp(x, phase) \ #define kf_cexp(x, phase) \
do { \ do { \
(x)->r = KISS_FFT_COS(phase); \ (x)->r = KISS_FFT_COS(phase); \
(x)->i = KISS_FFT_SIN(phase); \ (x)->i = KISS_FFT_SIN(phase); \
} while (0) } \
while (0)
/* a debugging function */ /* a debugging function */
#define pcpx(c) \ #define pcpx(c) fprintf(stderr, "%g + %gi\n", (double)((c)->r), (double)((c)->i)) \
fprintf(stderr, "%g + %gi\n", (double)((c)->r), (double)((c)->i))
#ifdef KISS_FFT_USE_ALLOCA #ifdef KISS_FFT_USE_ALLOCA
// define this to allow use of alloca instead of malloc for temporary buffers // Define this to allow use of alloca instead of malloc for temporary buffers.
// Temporary buffers are used in two case: // Temporary buffers are used in two cases:
// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5 // 1. FFT sizes that have "bad" factors, i.e., not 2, 3 or 5
// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an // 2. "In-place" FFTs. Notice the quotes, since kissfft does not really do an
// in-place transform. // in-place transform.
#include <alloca.h> #include <alloca.h>
#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes) #define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)

File diff suppressed because it is too large Load Diff

View File

@ -16,7 +16,6 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/ */
#include "FreeSurround/FreeSurroundDecoder.h"
#include "FreeSurround/ChannelMaps.h" #include "FreeSurround/ChannelMaps.h"
#include <cmath> #include <cmath>
@ -35,65 +34,65 @@ DPL2FSDecoder::~DPL2FSDecoder() {
kiss_fftr_free(inverse); kiss_fftr_free(inverse);
} }
void DPL2FSDecoder::Init(channel_setup chsetup, unsigned int blsize, void DPL2FSDecoder::Init(const channel_setup chsetup, const unsigned int blocksize, const unsigned int sample_rate) {
unsigned int sample_rate) { if (initialized)
if (!initialized) { return;
setup = chsetup;
N = blsize;
samplerate = sample_rate;
// Initialize the parameters setup = chsetup;
wnd = std::vector<double>(N); N = blocksize;
inbuf = std::vector<float>(3 * N); samplerate = sample_rate;
lt = std::vector<double>(N);
rt = std::vector<double>(N);
dst = std::vector<double>(N);
lf = std::vector<cplx>(N / 2 + 1);
rf = std::vector<cplx>(N / 2 + 1);
forward = kiss_fftr_alloc(N, 0, 0, 0);
inverse = kiss_fftr_alloc(N, 1, 0, 0);
C = static_cast<unsigned int>(chn_alloc[setup].size());
// Allocate per-channel buffers // Initialize the parameters
outbuf.resize((N + N / 2) * C); wnd = std::vector<double>(N);
signal.resize(C, std::vector<cplx>(N)); inbuf = std::vector<float>(3 * N);
lt = std::vector<double>(N);
rt = std::vector<double>(N);
dst = std::vector<double>(N);
lf = std::vector<cplx>(N / 2 + 1);
rf = std::vector<cplx>(N / 2 + 1);
forward = kiss_fftr_alloc(N, 0, nullptr, nullptr);
inverse = kiss_fftr_alloc(N, 1, nullptr, nullptr);
C = static_cast<unsigned int>(chn_alloc[setup].size());
// Init the window function // Allocate per-channel buffers
for (unsigned int k = 0; k < N; k++) outbuf.resize((N + N / 2) * C);
wnd[k] = sqrt(0.5 * (1 - cos(2 * pi * k / N)) / N); signal.resize(C, std::vector<cplx>(N));
// set default parameters // Init the window function
set_circular_wrap(90); for (unsigned int k = 0; k < N; k++)
set_shift(0); wnd[k] = sqrt(0.5 * (1 - cos(2 * pi * k / N)) / N);
set_depth(1);
set_focus(0);
set_center_image(1);
set_front_separation(1);
set_rear_separation(1);
set_low_cutoff(40.0f / samplerate * 2);
set_high_cutoff(90.0f / samplerate * 2);
set_bass_redirection(false);
initialized = true; // set default parameters
} set_circular_wrap(90);
set_shift(0);
set_depth(1);
set_focus(0);
set_center_image(1);
set_front_separation(1);
set_rear_separation(1);
set_low_cutoff(40.0f / static_cast<float>(samplerate) * 2);
set_high_cutoff(90.0f / static_cast<float>(samplerate) * 2);
set_bass_redirection(false);
initialized = true;
} }
// decode a stereo chunk, produces a multichannel chunk of the same size // decode a stereo chunk, produces a multichannel chunk of the same size
// (lagged) // (lagged)
float *DPL2FSDecoder::decode(float *input) { float *DPL2FSDecoder::decode(const float *input) {
if (initialized) { if (!initialized)
// append incoming data to the end of the input buffer return nullptr;
memcpy(&inbuf[N], &input[0], 8 * N);
// process first and second half, overlapped // append incoming data to the end of the input buffer
buffered_decode(&inbuf[0]); memcpy(&inbuf[N], &input[0], 8 * N);
buffered_decode(&inbuf[N]); // process first and second half, overlapped
// shift last half of the input to the beginning (for overlapping with a buffered_decode(&inbuf[0]);
// future block) buffered_decode(&inbuf[N]);
memcpy(&inbuf[0], &inbuf[2 * N], 4 * N); // shift last half of the input to the beginning (for overlapping with a
buffer_empty = false; // future block)
return &outbuf[0]; memcpy(&inbuf[0], &inbuf[2 * N], 4 * N);
} buffer_empty = false;
return 0; return &outbuf[0];
} }
// flush the internal buffers // flush the internal buffers
@ -104,56 +103,52 @@ void DPL2FSDecoder::flush() {
} }
// number of samples currently held in the buffer // number of samples currently held in the buffer
unsigned int DPL2FSDecoder::buffered() { return buffer_empty ? 0 : N / 2; } unsigned int DPL2FSDecoder::buffered() const { return buffer_empty ? 0 : N / 2; }
// set soundfield & rendering parameters // set soundfield & rendering parameters
void DPL2FSDecoder::set_circular_wrap(float v) { circular_wrap = v; } void DPL2FSDecoder::set_circular_wrap(const float v) { circular_wrap = v; }
void DPL2FSDecoder::set_shift(float v) { shift = v; } void DPL2FSDecoder::set_shift(const float v) { shift = v; }
void DPL2FSDecoder::set_depth(float v) { depth = v; } void DPL2FSDecoder::set_depth(const float v) { depth = v; }
void DPL2FSDecoder::set_focus(float v) { focus = v; } void DPL2FSDecoder::set_focus(const float v) { focus = v; }
void DPL2FSDecoder::set_center_image(float v) { center_image = v; } void DPL2FSDecoder::set_center_image(const float v) { center_image = v; }
void DPL2FSDecoder::set_front_separation(float v) { front_separation = v; } void DPL2FSDecoder::set_front_separation(const float v) { front_separation = v; }
void DPL2FSDecoder::set_rear_separation(float v) { rear_separation = v; } void DPL2FSDecoder::set_rear_separation(const float v) { rear_separation = v; }
void DPL2FSDecoder::set_low_cutoff(float v) { lo_cut = v * (N / 2); } void DPL2FSDecoder::set_low_cutoff(const float v) { lo_cut = v * static_cast<float>(N / 2.0); }
void DPL2FSDecoder::set_high_cutoff(float v) { hi_cut = v * (N / 2); } void DPL2FSDecoder::set_high_cutoff(const float v) { hi_cut = v * static_cast<float>(N / 2.0); }
void DPL2FSDecoder::set_bass_redirection(bool v) { use_lfe = v; } void DPL2FSDecoder::set_bass_redirection(const bool v) { use_lfe = v; }
// helper functions // helper functions
inline float DPL2FSDecoder::sqr(double x) { return static_cast<float>(x * x); } inline float DPL2FSDecoder::sqr(const double x) { return static_cast<float>(x * x); }
inline double DPL2FSDecoder::amplitude(const cplx &x) {
return sqrt(sqr(x.real()) + sqr(x.imag())); inline double DPL2FSDecoder::amplitude(const cplx &x) { return sqrt(sqr(x.real()) + sqr(x.imag())); }
}
inline double DPL2FSDecoder::phase(const cplx &x) { inline double DPL2FSDecoder::phase(const cplx &x) { return atan2(x.imag(), x.real()); }
return atan2(x.imag(), x.real());
} inline cplx DPL2FSDecoder::polar(const double a, const double p) { return cplx(a * cos(p), a * sin(p)); }
inline cplx DPL2FSDecoder::polar(double a, double p) {
return cplx(a * cos(p), a * sin(p)); inline float DPL2FSDecoder::min(const double a, const double b) { return static_cast<float>(a < b ? a : b); }
}
inline float DPL2FSDecoder::min(double a, double b) { inline float DPL2FSDecoder::max(const double a, const double b) { return static_cast<float>(a > b ? a : b); }
return static_cast<float>(a < b ? a : b);
} inline float DPL2FSDecoder::clamp(const double x) { return max(-1, min(1, x)); }
inline float DPL2FSDecoder::max(double a, double b) {
return static_cast<float>(a > b ? a : b); inline float DPL2FSDecoder::sign(const double x) { return static_cast<float>(x < 0 ? -1 : x > 0 ? 1 : 0); }
}
inline float DPL2FSDecoder::clamp(double x) { return max(-1, min(1, x)); }
inline float DPL2FSDecoder::sign(double x) {
return static_cast<float>(x < 0 ? -1 : (x > 0 ? 1 : 0));
}
// get the distance of the soundfield edge, along a given angle // get the distance of the soundfield edge, along a given angle
inline double DPL2FSDecoder::edgedistance(double a) { inline double DPL2FSDecoder::edgedistance(const double a) {
return min(sqrt(1 + sqr(tan(a))), sqrt(1 + sqr(1 / tan(a)))); return min(sqrt(1 + sqr(tan(a))), sqrt(1 + sqr(1 / tan(a))));
} }
// get the index (and fractional offset!) in a piecewise-linear channel // get the index (and fractional offset!) in a piecewise-linear channel
// allocation grid // allocation grid
int DPL2FSDecoder::map_to_grid(double &x) { int DPL2FSDecoder::map_to_grid(double &x) {
double gp = ((x + 1) * 0.5) * (grid_res - 1), const double gp = (x + 1) * 0.5 * (grid_res - 1), i = min(grid_res - 2, floor(gp));
i = min(grid_res - 2, floor(gp));
x = gp - i; x = gp - i;
return static_cast<int>(i); return static_cast<int>(i);
} }
// decode a block of data and overlap-add it into outbuf // decode a block of data and overlap-add it into outbuf
void DPL2FSDecoder::buffered_decode(float *input) { void DPL2FSDecoder::buffered_decode(const float *input) {
// demultiplex and apply window function // demultiplex and apply window function
for (unsigned int k = 0; k < N; k++) { for (unsigned int k = 0; k < N; k++) {
lt[k] = wnd[k] * input[k * 2 + 0]; lt[k] = wnd[k] * input[k * 2 + 0];
@ -161,24 +156,21 @@ void DPL2FSDecoder::buffered_decode(float *input) {
} }
// map into spectral domain // map into spectral domain
kiss_fftr(forward, &lt[0], (kiss_fft_cpx *)&lf[0]); kiss_fftr(forward, &lt[0], reinterpret_cast<kiss_fft_cpx *>(&lf[0]));
kiss_fftr(forward, &rt[0], (kiss_fft_cpx *)&rf[0]); kiss_fftr(forward, &rt[0], reinterpret_cast<kiss_fft_cpx *>(&rf[0]));
// compute multichannel output signal in the spectral domain // compute multichannel output signal in the spectral domain
for (unsigned int f = 1; f < N / 2; f++) { for (unsigned int f = 1; f < N / 2; f++) {
// get Lt/Rt amplitudes & phases // get Lt/Rt amplitudes & phases
double ampL = amplitude(lf[f]), ampR = amplitude(rf[f]); const double ampL = amplitude(lf[f]), ampR = amplitude(rf[f]), phaseL = phase(lf[f]), phaseR = phase(rf[f]);
double phaseL = phase(lf[f]), phaseR = phase(rf[f]); // calculate the amplitude and phase differences
// calculate the amplitude & phase differences const double ampDiff = clamp(ampL + ampR < epsilon ? 0 : (ampR - ampL) / (ampR + ampL));
double ampDiff =
clamp((ampL + ampR < epsilon) ? 0 : (ampR - ampL) / (ampR + ampL));
double phaseDiff = abs(phaseL - phaseR); double phaseDiff = abs(phaseL - phaseR);
if (phaseDiff > pi) if (phaseDiff > pi)
phaseDiff = 2 * pi - phaseDiff; phaseDiff = 2 * pi - phaseDiff;
// decode into x/y soundfield position // decode into x/y soundfield position
double x, y; auto [x, y] = transform_decode(ampDiff, phaseDiff);
transform_decode(ampDiff, phaseDiff, x, y);
// add wrap control // add wrap control
transform_circular_wrap(x, y, circular_wrap); transform_circular_wrap(x, y, circular_wrap);
// add shift control // add shift control
@ -188,42 +180,42 @@ void DPL2FSDecoder::buffered_decode(float *input) {
// add focus control // add focus control
transform_focus(x, y, focus); transform_focus(x, y, focus);
// add crossfeed control // add crossfeed control
x = clamp(x * x = clamp(x * (front_separation * (1 + y) / 2 + rear_separation * (1 - y) / 2));
(front_separation * (1 + y) / 2 + rear_separation * (1 - y) / 2));
// get total signal amplitude // get total signal amplitude
double amp_total = sqrt(ampL * ampL + ampR * ampR); const double amp_total = sqrt(ampL * ampL + ampR * ampR);
// and total L/C/R signal phases // and total L/C/R signal phases
double phase_of[] = { const double phase_of[] = {phaseL, atan2(lf[f].imag() + rf[f].imag(), lf[f].real() + rf[f].real()), phaseR};
phaseL, atan2(lf[f].imag() + rf[f].imag(), lf[f].real() + rf[f].real()),
phaseR};
// compute 2d channel map indexes p/q and update x/y to fractional offsets // compute 2d channel map indexes p/q and update x/y to fractional offsets
// in the map grid // in the map grid
int p = map_to_grid(x), q = map_to_grid(y); const int p = map_to_grid(x), q = map_to_grid(y);
// map position to channel volumes // map position to channel volumes
for (unsigned int c = 0; c < C - 1; c++) { for (unsigned int c = 0; c < C - 1; c++) {
// look up channel map at respective position (with bilinear // look up the channel map at respective position (with bilinear
// interpolation) and build the // interpolation) and build the
// signal // signal
std::vector<float *> &a = chn_alloc[setup][c]; std::vector<float *> &a = chn_alloc[setup][c];
signal[c][f] = polar( signal[c][f] = polar(
amp_total * ((1 - x) * (1 - y) * a[q][p] + x * (1 - y) * a[q][p + 1] + amp_total * ((1 - x) * (1 - y) * a[q][p] +
(1 - x) * y * a[q + 1][p] + x * y * a[q + 1][p + 1]), x * (1 - y) * a[q][p + 1] +
phase_of[1 + static_cast<int>(sign(chn_xsf[setup][c]))]); (1 - x) * y * a[q + 1][p] +
x * y * a[q + 1][p + 1]),
phase_of[1 + static_cast<int>(sign(chn_xsf[setup][c]))]);
} }
// optionally redirect bass // optionally redirect bass
if (use_lfe && f < hi_cut) { if (!use_lfe)
// level of LFE channel according to normalized frequency continue;
double lfe_level = const auto w = static_cast<float>(f);
f < lo_cut ? 1 if (w >= hi_cut)
: 0.5 * (1 + cos(pi * (f - lo_cut) / (hi_cut - lo_cut))); continue;
// assign LFE channel // level of LFE channel according to normalized frequency
signal[C - 1][f] = lfe_level * polar(amp_total, phase_of[1]); double lfe_level = w < lo_cut ? 1 : 0.5 * (1 + cos(pi * (w - lo_cut) / (hi_cut - lo_cut)));
// subtract the signal from the other channels // assign LFE channel
for (unsigned int c = 0; c < C - 1; c++) signal[C - 1][f] = lfe_level * polar(amp_total, phase_of[1]);
signal[c][f] *= (1 - lfe_level); // subtract the signal from the other channels
} for (unsigned int c = 0; c < C - 1; c++)
signal[c][f] *= 1 - lfe_level;
} }
// shift the last 2/3 to the first 2/3 of the output buffer // shift the last 2/3 to the first 2/3 of the output buffer
@ -233,7 +225,7 @@ void DPL2FSDecoder::buffered_decode(float *input) {
// backtransform each channel and overlap-add // backtransform each channel and overlap-add
for (unsigned int c = 0; c < C; c++) { for (unsigned int c = 0; c < C; c++) {
// back-transform into time domain // back-transform into time domain
kiss_fftri(inverse, (kiss_fft_cpx *)&signal[c][0], &dst[0]); kiss_fftri(inverse, reinterpret_cast<kiss_fft_cpx *>(&signal[c][0]), &dst[0]);
// add the result to the last 2/3 of the output buffer, windowed (and // add the result to the last 2/3 of the output buffer, windowed (and
// remultiplex) // remultiplex)
for (unsigned int k = 0; k < N; k++) for (unsigned int k = 0; k < N; k++)
@ -242,39 +234,37 @@ void DPL2FSDecoder::buffered_decode(float *input) {
} }
// transform amp/phase difference space into x/y soundfield space // transform amp/phase difference space into x/y soundfield space
void DPL2FSDecoder::transform_decode(double a, double p, double &x, double &y) { std::tuple<double, double> DPL2FSDecoder::transform_decode(const double amp, const double phase) {
x = clamp(1.0047 * a + 0.46804 * a * p * p * p - 0.2042 * a * p * p * p * p + return std::make_tuple(calculate_x(amp, phase), calculate_y(amp, phase));
0.0080586 * a * p * p * p * p * p * p * p - }
0.0001526 * a * p * p * p * p * p * p * p * p * p * p -
0.073512 * a * a * a * p - 0.2499 * a * a * a * p * p * p * p + float DPL2FSDecoder::calculate_x(const double amp, const double phase) {
0.016932 * a * a * a * p * p * p * p * p * p * p - const double ap3 = amp * pow(phase, 3), ap4 = amp * pow(phase, 4), ap7 = amp * pow(phase, 7),
0.00027707 * a * a * a * p * p * p * p * p * p * p * p * p * p + ap8 = amp * pow(phase, 8), a3p = pow(amp, 3) * phase, a3p4 = pow(amp, 3) * pow(phase, 4),
0.048105 * a * a * a * a * a * p * p * p * p * p * p * p - a3p7 = pow(amp, 3) * pow(phase, 7), a3p12 = pow(amp, 3) * pow(phase, 7),
0.0065947 * a * a * a * a * a * p * p * p * p * p * p * p * p * p * a5p7 = pow(amp, 5) * pow(phase, 7), a5p12 = pow(amp, 5) * pow(phase, 12),
p + a5p15 = pow(amp, 5) * pow(phase, 15), a7p9 = pow(amp, 7) * pow(phase, 9),
0.0016006 * a * a * a * a * a * p * p * p * p * p * p * p * p * p * a7p15 = pow(amp, 7) * pow(phase, 15), a8p16 = pow(amp, 8) * pow(phase, 16);
p * p -
0.0071132 * a * a * a * a * a * a * a * p * p * p * p * p * p * p * return clamp(1.0047 * amp + 0.46804 * ap3 - 0.2042 * ap4 + 0.0080586 * ap7 - 0.0001526 * ap8 - 0.073512 * a3p +
p * p + 0.2499 * a3p4 - 0.016932 * a3p7 + 0.00027707 * a3p12 + 0.048105 * a5p7 - 0.0065947 * a5p12 +
0.0022336 * a * a * a * a * a * a * a * p * p * p * p * p * p * p * 0.0016006 * a5p15 - 0.0071132 * a7p9 + 0.0022336 * a7p15 - 0.0004804 * a8p16);
p * p * p * p - }
0.0004804 * a * a * a * a * a * a * a * p * p * p * p * p * p * p *
p * p * p * p * p); float DPL2FSDecoder::calculate_y(const double amp, const double phase) {
y = clamp( const double p2 = pow(phase, 2), p5 = pow(phase, 5), a2p = pow(amp, 2) * phase, a2p6 = pow(amp, 2) * pow(phase, 6),
0.98592 - 0.62237 * p + 0.077875 * p * p - 0.0026929 * p * p * p * p * p + a4p7 = pow(amp, 4) * pow(phase, 7), a8 = pow(amp, 8), a10 = pow(amp, 10);
0.4971 * a * a * p - 0.00032124 * a * a * p * p * p * p * p * p +
9.2491e-006 * a * a * a * a * p * p * p * p * p * p * p * p * p * p + return clamp(0.98592 - 0.62237 * phase + 0.077875 * p2 - 0.0026929 * p5 + 0.4971 * a2p - 0.00032124 * a2p6 +
0.051549 * a * a * a * a * a * a * a * a + 9.2491e-006 * a4p7 + 0.051549 * a8 + 1.0727e-014 * a10);
1.0727e-014 * a * a * a * a * a * a * a * a * a * a);
} }
// apply a circular_wrap transformation to some position // apply a circular_wrap transformation to some position
void DPL2FSDecoder::transform_circular_wrap(double &x, double &y, void DPL2FSDecoder::transform_circular_wrap(double &x, double &y, double refangle) {
double refangle) {
if (refangle == 90) if (refangle == 90)
return; return;
refangle = refangle * pi / 180; refangle = refangle * pi / 180;
double baseangle = 90 * pi / 180; constexpr double baseangle = pi / 2;
// translate into edge-normalized polar coordinates // translate into edge-normalized polar coordinates
double ang = atan2(x, y), len = sqrt(x * x + y * y); double ang = atan2(x, y), len = sqrt(x * x + y * y);
len = len / edgedistance(ang); len = len / edgedistance(ang);
@ -293,15 +283,15 @@ void DPL2FSDecoder::transform_circular_wrap(double &x, double &y,
} }
// apply a focus transformation to some position // apply a focus transformation to some position
void DPL2FSDecoder::transform_focus(double &x, double &y, double focus) { void DPL2FSDecoder::transform_focus(double &x, double &y, const double focus) {
if (focus == 0) if (focus == 0)
return; return;
const double ang = atan2(x, y);
// translate into edge-normalized polar coordinates // translate into edge-normalized polar coordinates
double ang = atan2(x, y), double len = clamp(sqrt(x * x + y * y) / edgedistance(ang));
len = clamp(sqrt(x * x + y * y) / edgedistance(ang));
// apply focus // apply focus
len = focus > 0 ? 1 - pow(1 - len, 1 + focus * 20) : pow(len, 1 - focus * 20); len = focus > 0 ? 1 - pow(1 - len, 1 + focus * 20) : pow(len, 1 - focus * 20);
// back-transform into euclidian soundfield position // back-transform into Euclidean soundfield position
len = len * edgedistance(ang); len = len * edgedistance(ang);
x = clamp(sin(ang) * len); x = clamp(sin(ang) * len);
y = clamp(cos(ang) * len); y = clamp(cos(ang) * len);

View File

@ -37,19 +37,19 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "FreeSurround/_KissFFTGuts.h" #include "FreeSurround/_KissFFTGuts.h"
/* The guts header contains all the multiplication and addition macros that are #include <random>
defined for #include <vector>
fixed or floating point complex numbers. It also delares the kf_ internal
functions. /* The guts header contains all the multiplication and addition macros
that are defined for fixed or floating point complex numbers.
It also declares the kf_ internal functions.
*/ */
static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride, static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m) {
const kiss_fft_cfg st, int m) { const kiss_fft_cpx *tw1 = st->twiddles;
kiss_fft_cpx *Fout2; kiss_fft_cpx *Fout2 = Fout + m;
kiss_fft_cpx *tw1 = st->twiddles;
kiss_fft_cpx t;
Fout2 = Fout + m;
do { do {
kiss_fft_cpx t;
C_FIXDIV(*Fout, 2); C_FIXDIV(*Fout, 2);
C_FIXDIV(*Fout2, 2); C_FIXDIV(*Fout2, 2);
@ -62,17 +62,15 @@ static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride,
} while (--m); } while (--m);
} }
static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride, static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const size_t m) {
const kiss_fft_cfg st, const size_t m) {
kiss_fft_cpx *tw1, *tw2, *tw3; kiss_fft_cpx *tw1, *tw2, *tw3;
kiss_fft_cpx scratch[6];
size_t k = m; size_t k = m;
const size_t m2 = 2 * m; const size_t m2 = 2 * m, m3 = 3 * m;
const size_t m3 = 3 * m;
tw3 = tw2 = tw1 = st->twiddles; tw3 = tw2 = tw1 = st->twiddles;
do { do {
kiss_fft_cpx scratch[6];
C_FIXDIV(*Fout, 4); C_FIXDIV(*Fout, 4);
C_FIXDIV(Fout[m], 4); C_FIXDIV(Fout[m], 4);
C_FIXDIV(Fout[m2], 4); C_FIXDIV(Fout[m2], 4);
@ -107,18 +105,16 @@ static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride,
} while (--k); } while (--k);
} }
static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride, static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const size_t m) {
const kiss_fft_cfg st, size_t m) {
size_t k = m; size_t k = m;
const size_t m2 = 2 * m; const size_t m2 = 2 * m;
kiss_fft_cpx *tw1, *tw2; kiss_fft_cpx *tw1, *tw2;
kiss_fft_cpx scratch[5]; const auto [r, i] = st->twiddles[fstride * m];
kiss_fft_cpx epi3;
epi3 = st->twiddles[fstride * m];
tw1 = tw2 = st->twiddles; tw1 = tw2 = st->twiddles;
do { do {
kiss_fft_cpx scratch[5];
C_FIXDIV(*Fout, 3); C_FIXDIV(*Fout, 3);
C_FIXDIV(Fout[m], 3); C_FIXDIV(Fout[m], 3);
C_FIXDIV(Fout[m2], 3); C_FIXDIV(Fout[m2], 3);
@ -134,7 +130,7 @@ static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride,
Fout[m].r = Fout->r - HALF_OF(scratch[3].r); Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Fout[m].i = Fout->i - HALF_OF(scratch[3].i); Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
C_MULBYSCALAR(scratch[0], epi3.i); C_MULBYSCALAR(scratch[0], i);
C_ADDTO(*Fout, scratch[3]); C_ADDTO(*Fout, scratch[3]);
@ -148,25 +144,14 @@ static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride,
} while (--k); } while (--k);
} }
static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride, static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const int m) {
const kiss_fft_cfg st, int m) { kiss_fft_cpx *Fout0 = Fout, *Fout1 = Fout0 + m, *Fout2 = Fout0 + 2 * m, *Fout3 = Fout0 + 3 * m,
kiss_fft_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4; *Fout4 = Fout0 + 4 * m;
int u;
kiss_fft_cpx scratch[13]; kiss_fft_cpx scratch[13];
kiss_fft_cpx *twiddles = st->twiddles; const kiss_fft_cpx *twiddles = st->twiddles, *tw = st->twiddles, ya = twiddles[fstride * m],
kiss_fft_cpx *tw; yb = twiddles[fstride * 2 * m];
kiss_fft_cpx ya, yb;
ya = twiddles[fstride * m];
yb = twiddles[fstride * 2 * m];
Fout0 = Fout; for (int u = 0; u < m; ++u) {
Fout1 = Fout0 + m;
Fout2 = Fout0 + 2 * m;
Fout3 = Fout0 + 3 * m;
Fout4 = Fout0 + 4 * m;
tw = st->twiddles;
for (u = 0; u < m; ++u) {
C_FIXDIV(*Fout0, 5); C_FIXDIV(*Fout0, 5);
C_FIXDIV(*Fout1, 5); C_FIXDIV(*Fout1, 5);
C_FIXDIV(*Fout2, 5); C_FIXDIV(*Fout2, 5);
@ -187,10 +172,8 @@ static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride,
Fout0->r += scratch[7].r + scratch[8].r; Fout0->r += scratch[7].r + scratch[8].r;
Fout0->i += scratch[7].i + scratch[8].i; Fout0->i += scratch[7].i + scratch[8].i;
scratch[5].r = scratch[5].r = scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r);
scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r); scratch[5].i = scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r);
scratch[5].i =
scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r);
scratch[6].r = S_MUL(scratch[10].i, ya.i) + S_MUL(scratch[9].i, yb.i); scratch[6].r = S_MUL(scratch[10].i, ya.i) + S_MUL(scratch[9].i, yb.i);
scratch[6].i = -S_MUL(scratch[10].r, ya.i) - S_MUL(scratch[9].r, yb.i); scratch[6].i = -S_MUL(scratch[10].r, ya.i) - S_MUL(scratch[9].r, yb.i);
@ -198,10 +181,8 @@ static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride,
C_SUB(*Fout1, scratch[5], scratch[6]); C_SUB(*Fout1, scratch[5], scratch[6]);
C_ADD(*Fout4, scratch[5], scratch[6]); C_ADD(*Fout4, scratch[5], scratch[6]);
scratch[11].r = scratch[11].r = scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r);
scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r); scratch[11].i = scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r);
scratch[11].i =
scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r);
scratch[12].r = -S_MUL(scratch[10].i, yb.i) + S_MUL(scratch[9].i, ya.i); scratch[12].r = -S_MUL(scratch[10].i, yb.i) + S_MUL(scratch[9].i, ya.i);
scratch[12].i = S_MUL(scratch[10].r, yb.i) - S_MUL(scratch[9].r, ya.i); scratch[12].i = S_MUL(scratch[10].r, yb.i) - S_MUL(scratch[9].r, ya.i);
@ -217,43 +198,35 @@ static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride,
} }
/* perform the butterfly for one stage of a mixed radix FFT */ /* perform the butterfly for one stage of a mixed radix FFT */
static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride, static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const int m, const int p) {
const kiss_fft_cfg st, int m, int p) { const kiss_fft_cpx *twiddles = st->twiddles;
int u, k, q1, q; const int Norig = st->nfft;
kiss_fft_cpx *twiddles = st->twiddles;
kiss_fft_cpx t;
int Norig = st->nfft;
kiss_fft_cpx *scratch = const auto scratch = static_cast<kiss_fft_cpx *>(KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * p));
(kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * p);
for (u = 0; u < m; ++u) { for (int u = 0; u < m; ++u) {
k = u; for (int q1 = 0, i = u; q1 < p; ++q1, i += m) {
for (q1 = 0; q1 < p; ++q1) { scratch[q1] = Fout[i];
scratch[q1] = Fout[k];
C_FIXDIV(scratch[q1], p); C_FIXDIV(scratch[q1], p);
k += m;
} }
k = u; for (int q1 = 0, j = u; q1 < p; ++q1, j += m) {
for (q1 = 0; q1 < p; ++q1) {
int twidx = 0; int twidx = 0;
Fout[k] = scratch[0]; Fout[j] = scratch[0];
for (q = 1; q < p; ++q) { for (int q = 1; q < p; ++q) {
twidx += static_cast<int>(fstride) * k; kiss_fft_cpx t;
twidx += static_cast<int>(fstride) * j;
if (twidx >= Norig) if (twidx >= Norig)
twidx -= Norig; twidx -= Norig;
C_MUL(t, scratch[q], twiddles[twidx]); C_MUL(t, scratch[q], twiddles[twidx]);
C_ADDTO(Fout[k], t); C_ADDTO(Fout[j], t);
} }
k += m;
} }
} }
KISS_FFT_TMP_FREE(scratch); KISS_FFT_TMP_FREE(scratch);
} }
static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f, static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f, const size_t fstride, int in_stride, int *factors,
const size_t fstride, int in_stride, int *factors,
const kiss_fft_cfg st) { const kiss_fft_cfg st) {
kiss_fft_cpx *Fout_beg = Fout; kiss_fft_cpx *Fout_beg = Fout;
const int p = *factors++; /* the radix */ const int p = *factors++; /* the radix */
@ -269,26 +242,25 @@ static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f,
// execute the p different work units in different threads // execute the p different work units in different threads
#pragma omp parallel for #pragma omp parallel for
for (k = 0; k < p; ++k) for (k = 0; k < p; ++k)
kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride, kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride, factors, st);
factors, st);
// all threads have joined by this point // all threads have joined by this point
switch (p) { switch (p) {
case 2: case 2:
kf_bfly2(Fout, fstride, st, m); kf_bfly2(Fout, fstride, st, m);
break; break;
case 3: case 3:
kf_bfly3(Fout, fstride, st, m); kf_bfly3(Fout, fstride, st, m);
break; break;
case 4: case 4:
kf_bfly4(Fout, fstride, st, m); kf_bfly4(Fout, fstride, st, m);
break; break;
case 5: case 5:
kf_bfly5(Fout, fstride, st, m); kf_bfly5(Fout, fstride, st, m);
break; break;
default: default:
kf_bfly_generic(Fout, fstride, st, m, p); kf_bfly_generic(Fout, fstride, st, m, p);
break; break;
} }
return; return;
} }
@ -314,54 +286,89 @@ static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f,
// recombine the p smaller DFTs // recombine the p smaller DFTs
switch (p) { switch (p) {
case 2: case 2:
kf_bfly2(Fout, fstride, st, m); kf_bfly2(Fout, fstride, st, m);
break; break;
case 3: case 3:
kf_bfly3(Fout, fstride, st, m); kf_bfly3(Fout, fstride, st, m);
break; break;
case 4: case 4:
kf_bfly4(Fout, fstride, st, m); kf_bfly4(Fout, fstride, st, m);
break; break;
case 5: case 5:
kf_bfly5(Fout, fstride, st, m); kf_bfly5(Fout, fstride, st, m);
break; break;
default: default:
kf_bfly_generic(Fout, fstride, st, m, p); kf_bfly_generic(Fout, fstride, st, m, p);
break; break;
} }
} }
/* facbuf is populated by p1,m1,p2,m2, ... /**
where * @brief Implements Pollard's Rho algorithm to find a non-trivial factor of n.
p[i] * m[i] = m[i-1] *
m0 = n */ * This function uses Pollard's Rho algorithm, a probabilistic integer factorization method.
static void kf_factor(int n, int *facbuf) { * The algorithm generates a sequence of numbers based on a quadratic recurrence and uses the
int p = 4; * "tortoise and hare" technique to detect cycles. If a cycle is detected, the GCD of the
double floor_sqrt; * difference between two sequence values and n is computed, which yields a factor of n.
floor_sqrt = floor(sqrt((double)n)); *
* If n is even, the function immediately returns 2. Otherwise, the algorithm iterates
* until it finds a non-trivial factor or determines that no factor exists.
*
* @param n The integer to factor. The function will return a non-trivial factor of n.
*
* @return A non-trivial factor of n if found, otherwise 0 if no factor is found.
*/
int pollards_rho(const int n) {
if (n % 2 == 0)
return 2;
/*factor out powers of 4, powers of 2, then any remaining primes */ std::random_device rd;
do { std::mt19937 gen(rd());
while (n % p) { std::uniform_int_distribution dist(1, n - 1);
switch (p) {
case 4: int64_t x = dist(gen);
p = 2; int64_t y = x;
break; const int64_t c = dist(gen);
case 2: int64_t d = 1;
p = 3;
break; while (d == 1) {
default: x = (x * x + c) % n;
p += 2; y = (y * y + c) % n;
break; y = (y * y + c) % n;
} d = std::gcd(std::llabs(x - y), static_cast<int64_t>(n));
if (p > floor_sqrt) }
p = n; /* no more factors, skip to end */
return d == n ? 0 : static_cast<int>(d);
}
/**
* @brief Factorizes a number using Pollard's Rho algorithm.
*
* This function attempts to factor the given number `n` by repeatedly calling the Pollard's Rho algorithm.
* It continuously divides `n` by each found factor and stores the factors in the provided `facbuf` array.
* This method returns the prime factors of `n` and updates the array with factor pairs:
* each factor and the quotient of `n` after dividing by the factor.
*
* The factorization process continues until all factors of `n` are found,
* or until Pollard's Rho fails to find further factors.
*
* @param n The integer to factor. It will be reduced during the process.
* @param facbuf A pointer to an array where the factors will be stored.
* The factors are stored as pairs: each factor and the quotient of n after dividing by the factor.
*/
void kf_factor(int n, int *facbuf) {
while (n > 1) {
const int factor = pollards_rho(n);
if (factor == 0)
break;
while (n % factor == 0) {
n /= factor;
*facbuf++ = factor;
*facbuf++ = n;
} }
n /= p; }
*facbuf++ = p;
*facbuf++ = n;
} while (n > 1);
} }
/* /*
@ -372,73 +379,87 @@ static void kf_factor(int n, int *facbuf) {
* such, * such,
* It can be freed with free(), rather than a kiss_fft-specific function. * It can be freed with free(), rather than a kiss_fft-specific function.
* */ * */
kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem, kiss_fft_cfg kiss_fft_alloc(const int nfft, const int inverse_fft, void *mem, size_t *lenmem) {
size_t *lenmem) { kiss_fft_cfg st = nullptr;
kiss_fft_cfg st = NULL; const size_t memneeded = sizeof(struct kiss_fft_state) + sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/
size_t memneeded = sizeof(struct kiss_fft_state) +
sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/
if (lenmem == NULL) { if (lenmem == nullptr) {
st = (kiss_fft_cfg) new char[memneeded]; st = reinterpret_cast<kiss_fft_cfg>(new char[memneeded]);
} else { } else {
if (mem != NULL && *lenmem >= memneeded) if (mem != nullptr && *lenmem >= memneeded)
st = (kiss_fft_cfg)mem; st = static_cast<kiss_fft_cfg>(mem);
*lenmem = memneeded; *lenmem = memneeded;
} }
if (st) { if (!st) {
int i; return st;
st->nfft = nfft;
st->inverse = inverse_fft;
for (i = 0; i < nfft; ++i) {
const double pi =
3.141592653589793238462643383279502884197169399375105820974944;
double phase = -2 * pi * i / nfft;
if (st->inverse)
phase *= -1;
kf_cexp(st->twiddles + i, phase);
}
kf_factor(nfft, st->factors);
} }
st->nfft = nfft;
st->inverse = inverse_fft;
for (int i = 0; i < nfft; ++i) {
double phase = -2 * pi * i / nfft;
if (st->inverse)
phase *= -1;
kf_cexp(st->twiddles + i, phase);
}
kf_factor(nfft, st->factors);
return st; return st;
} }
void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin, void kiss_fft_stride(const kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout, const int fin_stride) {
kiss_fft_cpx *fout, int in_stride) { if (fin != fout) {
if (fin == fout) { kf_work(fout, fin, 1, fin_stride, cfg->factors, cfg);
// NOTE: this is not really an in-place FFT algorithm. return;
// It just performs an out-of-place FFT into a temp buffer
kiss_fft_cpx *tmpbuf =
(kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * st->nfft);
kf_work(tmpbuf, fin, 1, in_stride, st->factors, st);
memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * st->nfft);
KISS_FFT_TMP_FREE(tmpbuf);
} else {
kf_work(fout, fin, 1, in_stride, st->factors, st);
} }
// NOTE: this is not really an in-place FFT algorithm.
// It just performs an out-of-place FFT into a temp buffer
auto *tmpbuf = static_cast<kiss_fft_cpx *>(KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * cfg->nfft));
kf_work(tmpbuf, fin, 1, fin_stride, cfg->factors, cfg);
memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * cfg->nfft);
KISS_FFT_TMP_FREE(tmpbuf);
} }
void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout) { void kiss_fft(const kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout) {
kiss_fft_stride(cfg, fin, fout, 1); kiss_fft_stride(cfg, fin, fout, 1);
} }
void kiss_fft_cleanup(void) { /**
// nothing needed any more * Finds the next largest integer that can be expressed as a product of
} * the prime factors 2, 3, and 5. This ensures the number is factorable
* by these primes, making it suitable for optimized FFT computations.
*
* @param n The starting integer to search from.
* @return The smallest integer greater than or equal to `n`
* divisible only by the primes 2, 3, and 5.
*/
int kiss_fft_next_fast_size(const int n) {
std::vector hammingNumbers = {1}; // Start with 1 as the smallest Hamming number
int i2 = 0, i3 = 0, i5 = 0; // Pointers for multiples of 2, 3, and 5
int kiss_fft_next_fast_size(int n) { while (true) {
while (1) { // Generate the next candidates by multiplying with 2, 3, and 5
int m = n; int next2 = hammingNumbers[i2] * 2;
while ((m % 2) == 0) int next3 = hammingNumbers[i3] * 3;
m /= 2; int next5 = hammingNumbers[i5] * 5;
while ((m % 3) == 0)
m /= 3; // Find the smallest candidate
while ((m % 5) == 0) int nextHamming = std::min({next2, next3, next5});
m /= 5;
if (m <= 1) // If the candidate is >= n, return it
break; /* n is completely factorable by twos, threes, and fives */ if (nextHamming >= n) {
n++; return nextHamming;
}
// Add the smallest candidate to the list
hammingNumbers.push_back(nextHamming);
// Increment the respective pointer(s)
if (nextHamming == next2)
i2++;
if (nextHamming == next3)
i3++;
if (nextHamming == next5)
i5++;
} }
return n;
} }

View File

@ -38,6 +38,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "FreeSurround/KissFFTR.h" #include "FreeSurround/KissFFTR.h"
#include "FreeSurround/_KissFFTGuts.h" #include "FreeSurround/_KissFFTGuts.h"
#include <cstdio>
struct kiss_fftr_state { struct kiss_fftr_state {
kiss_fft_cfg substate; kiss_fft_cfg substate;
@ -48,40 +49,36 @@ struct kiss_fftr_state {
#endif #endif
}; };
kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void *mem, kiss_fftr_cfg kiss_fftr_alloc(int nfft, const int inverse_fft, void *mem, size_t *lenmem) {
size_t *lenmem) { kiss_fftr_cfg st = nullptr;
int i;
kiss_fftr_cfg st = NULL;
size_t subsize = 65536 * 4, memneeded = 0; size_t subsize = 65536 * 4, memneeded = 0;
if (nfft & 1) { if (nfft & 1) {
fprintf(stderr, "Real FFT optimization must be even.\n"); fprintf(stderr, "Real FFT optimization must be even.\n");
return NULL; return nullptr;
} }
nfft >>= 1; nfft >>= 1;
kiss_fft_alloc(nfft, inverse_fft, NULL, &subsize); kiss_fft_alloc(nfft, inverse_fft, nullptr, &subsize);
memneeded = sizeof(struct kiss_fftr_state) + subsize + memneeded = sizeof(kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * (nfft * 3 / 2);
sizeof(kiss_fft_cpx) * (nfft * 3 / 2);
if (lenmem == NULL) { if (lenmem == nullptr) {
st = (kiss_fftr_cfg)malloc(memneeded); st = static_cast<kiss_fftr_cfg>(malloc(memneeded));
} else { } else {
if (*lenmem >= memneeded) if (*lenmem >= memneeded)
st = (kiss_fftr_cfg)mem; st = static_cast<kiss_fftr_cfg>(mem);
*lenmem = memneeded; *lenmem = memneeded;
} }
if (!st) if (!st)
return NULL; return nullptr;
st->substate = (kiss_fft_cfg)(st + 1); /*just beyond kiss_fftr_state struct */ st->substate = reinterpret_cast<kiss_fft_cfg>(st + 1); /*just beyond kiss_fftr_state struct */
st->tmpbuf = (kiss_fft_cpx *)(((char *)st->substate) + subsize); st->tmpbuf = reinterpret_cast<kiss_fft_cpx *>(reinterpret_cast<char *>(st->substate) + subsize);
st->super_twiddles = st->tmpbuf + nfft; st->super_twiddles = st->tmpbuf + nfft;
kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
for (i = 0; i < nfft / 2; ++i) { for (int i = 0; i < nfft / 2; ++i) {
double phase = double phase = -pi * (static_cast<double>(i + 1) / nfft + .5);
-3.14159265358979323846264338327 * ((double)(i + 1) / nfft + .5);
if (inverse_fft) if (inverse_fft)
phase *= -1; phase *= -1;
kf_cexp(st->super_twiddles + i, phase); kf_cexp(st->super_twiddles + i, phase);
@ -89,21 +86,19 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void *mem,
return st; return st;
} }
void kiss_fftr(kiss_fftr_cfg st, const kiss_fft_scalar *timedata, void kiss_fftr(kiss_fftr_cfg cfg, const kiss_fft_scalar *timedata, kiss_fft_cpx *freqdata) {
kiss_fft_cpx *freqdata) {
/* input buffer timedata is stored row-wise */ /* input buffer timedata is stored row-wise */
int k, ncfft; kiss_fft_cpx tdc;
kiss_fft_cpx fpnk, fpk, f1k, f2k, tw, tdc;
if (st->substate->inverse) { if (cfg->substate->inverse) {
fprintf(stderr, "kiss fft usage error: improper alloc\n"); fprintf(stderr, "kiss fft usage error: improper alloc\n");
exit(1); exit(1);
} }
ncfft = st->substate->nfft; int ncfft = cfg->substate->nfft;
/*perform the parallel fft of two real signals packed in real,imag*/ /*perform the parallel fft of two real signals packed in real,imag*/
kiss_fft(st->substate, (const kiss_fft_cpx *)timedata, st->tmpbuf); kiss_fft(cfg->substate, reinterpret_cast<const kiss_fft_cpx *>(timedata), cfg->tmpbuf);
/* The real part of the DC element of the frequency spectrum in st->tmpbuf /* The real part of the DC element of the frequency spectrum in st->tmpbuf
* contains the sum of the even-numbered elements of the input time sequence * contains the sum of the even-numbered elements of the input time sequence
* The imag part is the sum of the odd-numbered elements * The imag part is the sum of the odd-numbered elements
@ -115,29 +110,30 @@ void kiss_fftr(kiss_fftr_cfg st, const kiss_fft_scalar *timedata,
* yielding Nyquist bin of input time sequence * yielding Nyquist bin of input time sequence
*/ */
tdc.r = st->tmpbuf[0].r; tdc.r = cfg->tmpbuf[0].r;
tdc.i = st->tmpbuf[0].i; tdc.i = cfg->tmpbuf[0].i;
C_FIXDIV(tdc, 2); C_FIXDIV(tdc, 2);
CHECK_OVERFLOW_OP(tdc.r, +, tdc.i); CHECK_OVERFLOW_OP(tdc.r, +, tdc.i);
CHECK_OVERFLOW_OP(tdc.r, -, tdc.i); CHECK_OVERFLOW_OP(tdc.r, -, tdc.i);
freqdata[0].r = tdc.r + tdc.i; freqdata[0].r = tdc.r + tdc.i;
freqdata[ncfft].r = tdc.r - tdc.i; freqdata[ncfft].r = tdc.r - tdc.i;
#ifdef USE_SIMD #ifdef USE_SIMD
freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
#else #else
freqdata[ncfft].i = freqdata[0].i = 0; freqdata[ncfft].i = freqdata[0].i = 0;
#endif #endif
for (k = 1; k <= ncfft / 2; ++k) { for (int k = 1; k <= ncfft / 2; ++k) {
fpk = st->tmpbuf[k]; kiss_fft_cpx fpnk, f1k, f2k, tw;
fpnk.r = st->tmpbuf[ncfft - k].r; const kiss_fft_cpx fpk = cfg->tmpbuf[k];
fpnk.i = -st->tmpbuf[ncfft - k].i; fpnk.r = cfg->tmpbuf[ncfft - k].r;
fpnk.i = -cfg->tmpbuf[ncfft - k].i;
C_FIXDIV(fpk, 2); C_FIXDIV(fpk, 2);
C_FIXDIV(fpnk, 2); C_FIXDIV(fpnk, 2);
C_ADD(f1k, fpk, fpnk); C_ADD(f1k, fpk, fpnk);
C_SUB(f2k, fpk, fpnk); C_SUB(f2k, fpk, fpnk);
C_MUL(tw, f2k, st->super_twiddles[k - 1]); C_MUL(tw, f2k, cfg->super_twiddles[k - 1]);
freqdata[k].r = HALF_OF(f1k.r + tw.r); freqdata[k].r = HALF_OF(f1k.r + tw.r);
freqdata[k].i = HALF_OF(f1k.i + tw.i); freqdata[k].i = HALF_OF(f1k.i + tw.i);
@ -146,25 +142,23 @@ void kiss_fftr(kiss_fftr_cfg st, const kiss_fft_scalar *timedata,
} }
} }
void kiss_fftri(kiss_fftr_cfg st, const kiss_fft_cpx *freqdata, void kiss_fftri(kiss_fftr_cfg cfg, const kiss_fft_cpx *freqdata, kiss_fft_scalar *timedata) {
kiss_fft_scalar *timedata) {
/* input buffer timedata is stored row-wise */ /* input buffer timedata is stored row-wise */
int k, ncfft;
if (st->substate->inverse == 0) { if (cfg->substate->inverse == 0) {
fprintf(stderr, "kiss fft usage error: improper alloc\n"); fprintf(stderr, "kiss fft usage error: improper alloc\n");
exit(1); exit(1);
} }
ncfft = st->substate->nfft; int ncfft = cfg->substate->nfft;
st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r; cfg->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r; cfg->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
C_FIXDIV(st->tmpbuf[0], 2); C_FIXDIV(st->tmpbuf[0], 2);
for (k = 1; k <= ncfft / 2; ++k) { for (int k = 1; k <= ncfft / 2; ++k) {
kiss_fft_cpx fk, fnkc, fek, fok, tmp; kiss_fft_cpx fnkc, fek, fok, tmp;
fk = freqdata[k]; const kiss_fft_cpx fk = freqdata[k];
fnkc.r = freqdata[ncfft - k].r; fnkc.r = freqdata[ncfft - k].r;
fnkc.i = -freqdata[ncfft - k].i; fnkc.i = -freqdata[ncfft - k].i;
C_FIXDIV(fk, 2); C_FIXDIV(fk, 2);
@ -172,14 +166,14 @@ void kiss_fftri(kiss_fftr_cfg st, const kiss_fft_cpx *freqdata,
C_ADD(fek, fk, fnkc); C_ADD(fek, fk, fnkc);
C_SUB(tmp, fk, fnkc); C_SUB(tmp, fk, fnkc);
C_MUL(fok, tmp, st->super_twiddles[k - 1]); C_MUL(fok, tmp, cfg->super_twiddles[k - 1]);
C_ADD(st->tmpbuf[k], fek, fok); C_ADD(cfg->tmpbuf[k], fek, fok);
C_SUB(st->tmpbuf[ncfft - k], fek, fok); C_SUB(cfg->tmpbuf[ncfft - k], fek, fok);
#ifdef USE_SIMD #ifdef USE_SIMD
st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
#else #else
st->tmpbuf[ncfft - k].i *= -1; cfg->tmpbuf[ncfft - k].i *= -1;
#endif #endif
} }
kiss_fft(st->substate, st->tmpbuf, (kiss_fft_cpx *)timedata); kiss_fft(cfg->substate, cfg->tmpbuf, reinterpret_cast<kiss_fft_cpx *>(timedata));
} }