Update SoundTouch to 1.9.0.

It claimed to be 1.7.1 but it had a mixture from various
versions. It was hard to update as everything in the top directory
so I used upstream's way to organize files. I renamed include to
soundtouch since I did not want to #ifdef that for windows.
Wavfile.h is a private header so I used the private path instead of
moving the file over. This changed 3 files in the plugin folder.
This commit is contained in:
Miguel A. Colón Vélez 2015-08-22 10:16:27 -04:00
parent 9f2642a714
commit 09c8a41294
52 changed files with 3677 additions and 2606 deletions

View File

/// Win32 version of the AMD 3DNow! optimized routines for AMD K6-2/Athlon
/// processors. All 3DNow! optimized functions have been gathered into this
/// single source code file, regardless to their class or original source code
/// file, in order to ease porting the library to other compiler and processor
/// platforms.
/// By the way; the performance gain depends heavily on the CPU generation: On
/// K6-2 these routines provided speed-up of even 2.4 times, while on Athlon the
/// difference to the original routines stayed at unremarkable 8%! Such a small
/// improvement on Athlon is due to 3DNow can perform only two operations in
/// parallel, and obviously also the Athlon FPU is doing a very good job with
/// the standard C floating point routines! Here these routines are anyway,
/// although it might not be worth the effort to convert these to GCC platform,
/// for Athlon CPU at least. The situation is different regarding the SSE
/// optimizations though, thanks to the four parallel operations of SSE that
/// already make a difference.
/// This file is to be compiled in Windows platform with Microsoft Visual C++
/// Compiler. Please see '3dnow_gcc.cpp' for the gcc compiler version for all
/// GNU platforms (if file supplied).
/// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++
/// 6.0 processor pack" update to support 3DNow! instruction set. The update is
/// available for download at Microsoft Developers Network, see here:
/// http://msdn.microsoft.com/en-us/vstudio/aa718349.aspx
/// If the above URL is expired or removed, go to "http://msdn.microsoft.com" and
/// perform a search with keywords "processor pack".
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
#include "cpu_detect.h"
#include "STTypes.h"
#ifndef WIN32
#error "wrong platform - this source code file is exclusively for Win32 platform"
using namespace soundtouch;
#ifdef ALLOW_3DNOW
// 3DNow! routines available only with float sample type
// implementation of 3DNow! optimized functions of class 'TDStretch3DNow'
#include "TDStretch.h"
// Calculates cross correlation of two buffers
double TDStretch3DNow::calcCrossCorrStereo(const float *pV1, const float *pV2) const
int overlapLengthLocal = overlapLength;
float corr = 0;
// Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
corr = 0;
for (i = 0; i < overlapLength / 4; i ++)
corr += pV1[0] * pV2[0];
pV1[1] * pV2[1];
pV1[2] * pV2[2];
pV1[3] * pV2[3];
pV1[4] * pV2[4];
pV1[5] * pV2[5];
pV1[6] * pV2[6];
pV1[7] * pV2[7];
pV1 += 8;
pV2 += 8;
// give prefetch hints to CPU of what data are to be needed soonish.
// give more aggressive hints on pV1 as that changes more between different calls
// while pV2 stays the same.
prefetch [pV1]
prefetch [pV2]
prefetch [pV1 + 32]
mov eax, dword ptr pV2
mov ebx, dword ptr pV1
pxor mm0, mm0
mov ecx, overlapLengthLocal
shr ecx, 2 // div by four
movq mm1, [eax]
prefetch [eax + 32] // give a prefetch hint to CPU what data are to be needed soonish
pfmul mm1, [ebx]
prefetch [ebx + 64] // give a prefetch hint to CPU what data are to be needed soonish
movq mm2, [eax + 8]
pfadd mm0, mm1
pfmul mm2, [ebx + 8]
movq mm3, [eax + 16]
pfadd mm0, mm2
pfmul mm3, [ebx + 16]
movq mm4, [eax + 24]
pfadd mm0, mm3
pfmul mm4, [ebx + 24]
add eax, 32
pfadd mm0, mm4
add ebx, 32
dec ecx
jnz loop1
// add halfs of mm0 together and return the result.
// note: mm1 is used as a dummy parameter only, we actually don't care about it's value
pfacc mm0, mm1
movd corr, mm0
return corr;
// implementation of 3DNow! optimized functions of class 'FIRFilter'
#include "FIRFilter.h"
FIRFilter3DNow::FIRFilter3DNow() : FIRFilter()
filterCoeffsUnalign = NULL;
filterCoeffsAlign = NULL;
delete[] filterCoeffsUnalign;
filterCoeffsUnalign = NULL;
filterCoeffsAlign = NULL;
// (overloaded) Calculates filter coefficients for 3DNow! routine
void FIRFilter3DNow::setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor)
uint i;
float fDivider;
FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor);
// Scale the filter coefficients so that it won't be necessary to scale the filtering result
// also rearrange coefficients suitably for 3DNow!
// Ensure that filter coeffs array is aligned to 16-byte boundary
delete[] filterCoeffsUnalign;
filterCoeffsUnalign = new float[2 * newLength + 4];
filterCoeffsAlign = (float *)(((uint)filterCoeffsUnalign + 15) & (uint)-16);
fDivider = (float)resultDivider;
// rearrange the filter coefficients for mmx routines
for (i = 0; i < newLength; i ++)
filterCoeffsAlign[2 * i + 0] =
filterCoeffsAlign[2 * i + 1] = coeffs[i + 0] / fDivider;
// 3DNow!-optimized version of the filter routine for stereo sound
uint FIRFilter3DNow::evaluateFilterStereo(float *dest, const float *src, uint numSamples) const
float *filterCoeffsLocal = filterCoeffsAlign;
uint count = (numSamples - length) & (uint)-2;
uint lengthLocal = length / 4;
assert(length != 0);
assert(count % 2 == 0);
/* original code:
double suml1, suml2;
double sumr1, sumr2;
uint i, j;
for (j = 0; j < count; j += 2)
const float *ptr;
suml1 = sumr1 = 0.0;
suml2 = sumr2 = 0.0;
ptr = src;
filterCoeffsLocal = filterCoeffs;
for (i = 0; i < lengthLocal; i ++)
// unroll loop for efficiency.
suml1 += ptr[0] * filterCoeffsLocal[0] +
ptr[2] * filterCoeffsLocal[2] +
ptr[4] * filterCoeffsLocal[4] +
ptr[6] * filterCoeffsLocal[6];
sumr1 += ptr[1] * filterCoeffsLocal[1] +
ptr[3] * filterCoeffsLocal[3] +
ptr[5] * filterCoeffsLocal[5] +
ptr[7] * filterCoeffsLocal[7];
suml2 += ptr[8] * filterCoeffsLocal[0] +
ptr[10] * filterCoeffsLocal[2] +
ptr[12] * filterCoeffsLocal[4] +
ptr[14] * filterCoeffsLocal[6];
sumr2 += ptr[9] * filterCoeffsLocal[1] +
ptr[11] * filterCoeffsLocal[3] +
ptr[13] * filterCoeffsLocal[5] +
ptr[15] * filterCoeffsLocal[7];
ptr += 16;
filterCoeffsLocal += 8;
dest[0] = (float)suml1;
dest[1] = (float)sumr1;
dest[2] = (float)suml2;
dest[3] = (float)sumr2;
src += 4;
dest += 4;
mov eax, dword ptr dest
mov ebx, dword ptr src
mov edx, count
shr edx, 1
// "outer loop" : during each round 2*2 output samples are calculated
prefetch [ebx] // give a prefetch hint to CPU what data are to be needed soonish
prefetch [filterCoeffsLocal] // give a prefetch hint to CPU what data are to be needed soonish
mov esi, ebx
mov edi, filterCoeffsLocal
pxor mm0, mm0
pxor mm1, mm1
mov ecx, lengthLocal
// "inner loop" : during each round four FIR filter taps are evaluated for 2*2 output samples
movq mm2, [edi]
movq mm3, mm2
prefetch [edi + 32] // give a prefetch hint to CPU what data are to be needed soonish
pfmul mm2, [esi]
prefetch [esi + 32] // give a prefetch hint to CPU what data are to be needed soonish
pfmul mm3, [esi + 8]
movq mm4, [edi + 8]
movq mm5, mm4
pfadd mm0, mm2
pfmul mm4, [esi + 8]
pfadd mm1, mm3
pfmul mm5, [esi + 16]
movq mm2, [edi + 16]
movq mm6, mm2
pfadd mm0, mm4
pfmul mm2, [esi + 16]
pfadd mm1, mm5
pfmul mm6, [esi + 24]
movq mm3, [edi + 24]
movq mm7, mm3
pfadd mm0, mm2
pfmul mm3, [esi + 24]
pfadd mm1, mm6
pfmul mm7, [esi + 32]
add esi, 32
pfadd mm0, mm3
add edi, 32
pfadd mm1, mm7
dec ecx
jnz loop2
movq [eax], mm0
add ebx, 16
movq [eax + 8], mm1
add eax, 16
dec edx
jnz loop1
return count;
#endif // ALLOW_3DNOW

3rdparty/soundtouch/COPYING.TXT vendored Normal file
View File

@ -8,15 +8,13 @@
<h2>5. Change History</h2>
<h3>5.1. SoundTouch library Change History </h3>
thus choose the proper version to allow the program start.</p>
<h3>2.2. Building in Gnu platforms</h3>
<p>The SoundTouch library compiles in practically any platform
supporting GNU compiler (GCC) tools. SoundTouch requires GCC version 4.3 or later.</p>
@ -92,7 +102,9 @@ Notice that "configure" file is not available before running the
<pre>make -</pre>
<p>Builds the SoundTouch library &amp; SoundStretch utility.</p>
<p>Builds the SoundTouch library &amp; SoundStretch utility. You can
optionally add &quot;-j&quot; switch after &quot;make&quot; to speed up the compilation in
multi-core systems.</p>
<tr valign="top">
@ -133,7 +145,7 @@ directly and remove the following definition:<blockquote>
<h4><b>2.2.3 Compiling Shared Library / DLL version</b></h4>
<h4><b>2.2.3 Compiling Shared Library / DLL version in Cygwin</b></h4>
The GNU compilation does not automatically create a shared-library version of
SoundTouch (.so or .dll). If such is desired, then you can create it as follows
@ -147,7 +159,15 @@ sstrip SoundTouch.dll</pre>
<h3>2.1. Building in Android</h3>
<p>Android compilation instructions are within the
source code package, see file &quot;<b>source/Android-lib/README-SoundTouch-Android.html</b>&quot;
in the package.</p>
in the source code package. </p>
<p>The Android compilation automatically builds separate .so library binaries
for ARM, X86 and MIPS processor architectures. For optimal device support,
include all these .so library binaries into the Android .apk application
package, so the target Android device can automatically choose the proper
library binary version to use.</p>
<p>The <strong>source/Android-lib</strong> folder includes also an Android
example application that processes WAV audio files using SoundTouch library in
Android devices.</p>
<h2>3. About implementation &amp; Usage tips <h3>3.1. Supported sample data formats</h3>
@ -343,28 +363,55 @@ function with parameter&nbsp; id of SETTING_USE_QUICKSEEK and value
<p>setSetting(SETTING_USE_QUICKSEEK, 1);</p>
<p><strong>CPU-specific optimizations:</strong></p>
<p>Intel x86 specific SIMD optimizations are implemented using compiler
intrinsics, providing about a 3x processing speedup for x86 compatible
processors vs. non-SIMD implementation:</p>
<li> Intel MMX optimized routines are used with compatible CPUs when
16bit integer sample type is used. MMX optimizations are available both
in Win32 and Gnu/x86 platforms. Compatible processors are Intel
PentiumMMX and later; AMD K6-2, Athlon and later. </li>
<li> Intel SSE optimized routines are used with compatible CPUs when
floating point sample type is used. SSE optimizations are currently
implemented for Win32 platform only. Processors compatible with SSE
extension are Intel processors starting from Pentium-III, and AMD
processors starting from Athlon XP. </li>
<li> AMD 3DNow! optimized routines are used with compatible CPUs when
floating point sample type is used, but SSE extension isn't supported .
3DNow! optimizations are currently implemented for Win32 platform only.
These optimizations are used in AMD K6-2 and Athlon (classic) CPU's;
better performing SSE routines are used with AMD processor starting
from Athlon XP. </li>
<li> Intel MMX optimized routines are used with x86 CPUs when 16bit integer
sample type is used</li>
<li> Intel SSE optimized routines are used with x86 CPUs when 32bit floating
point sample type is used</li>
<h3>3.5 OpenMP parallel computation</h3>
<p>SoundTouch 1.9 onwards support running the algorithms parallel in several CPU
cores. Based on benchmark the experienced multi-core processing speed-up gain
ranges between +30% (on a high-spec dual-core x86 Windows PC) to 215% (on a moderately low-spec
quad-core ARM of Raspberry Pi2).</p>
<p>The parallel computing support is implemented using OpenMP spec 3.0
instructions. These instructions are supported by Visual C++ 2008 and later, and
GCC v4.2 and later. Compilers that do not supporting OpenMP will ignore these
optimizations and routines will still work properly. Possible warnings about
unknown #pragmas are related to OpenMP support and can be safely ignored.</p>
<p>The OpenMP improvements are disabled by default, and need to be enabled by
developer during compile-time. Reason for this is that parallel processing adds
moderate runtime overhead in managing the multi-threading, so it may not be
necessary nor desirable in all applications. For example real-time processing
that is not constrained by CPU power will not benefit of speed-up provided by
the parallel processing, in the contrary it may increase power consumption due
to the increased overhead.</p>
<p>However, applications that run on low-spec multi-core CPUs and may otherwise
have possibly constrained performance will benefit of the OpenMP improvements.
This include for example multi-core embedded devices.</p>
<p>OpenMP parallel computation can be enabled before compiling SoundTouch
library as follows:</p>
<li><strong>Visual Studio</strong>: Open properties for the <strong>SoundTouch
</strong>sub-project, browse to <strong>C/C++</strong> and <strong>Language
</strong>settings. Set
there &quot;<strong>OpenMP support</strong>&quot; to &quot;<strong>Yes</strong>&quot;. Alternatively add
<strong>/openmp</strong> switch to command-line
<li><strong>GNU</strong>: Run the configure script with &quot;<strong>./configure
--enable-openmp</strong>&quot; switch, then run make as usually</li>
<li><strong>Android</strong>: Add &quot;<strong>-fopenmp</strong>&quot; switches to compiler &amp; linker
options, see README-SoundTouch-Android.html in the source code package for
more detailed instructions.</li>
<h2><a name="SoundStretch"></a>4. SoundStretch audio processing utility
<p>SoundStretch audio processing utility<br>
Copyright (c) Olli Parviainen 2002-2012</p>
Copyright (c) Olli Parviainen 2002-2015</p>
<p>SoundStretch is a simple command-line application that can change
tempo, pitch and playback rates of WAV sound files. This program is
intended primarily to demonstrate how the "SoundTouch" library can be
@ -510,9 +557,42 @@ and estimates the BPM rate:</p>
<pre>soundstretch stdin -bpm</pre>
<p><strong>Example 6</strong></p>
<p>The following command tunes song from original 440Hz tuning to 432Hz tuning:
this corresponds to lowering the pitch by -0.318 semitones:</p>
<pre>soundstretch original.wav output.wav -pitch=-0.318</pre>
<h2>5. Change History</h2>
<h3>5.1. SoundTouch library Change History </h3>
<li>Added support for parallel computation support via OpenMP primitives for better performance in multicore systems.
Benchmarks show that achieved parallel processing speedup improvement
typically range from +30% (x86 dual-core) to +180% (ARM quad-core). The
OpenMP optimizations are disabled by default, see OpenMP notes above in this
readme file how to enabled these optimizations.</li>
<li>Android: Added support for Android devices featuring X86 and MIPS CPUs,
in addition to ARM CPUs.</li>
<li>Android: More versatile Android example application that processes WAV
audio files with SoundTouch library</li>
<li>Replaced Windows-like 'BOOL' types with native 'bool'</li>
<li>Changed documentation token to "dist_doc_DATA" in Makefile.am file</li>
<li>Miscellaneous small fixes and improvements</li>
<li>Added support for multi-channel audio processing</li>
<li>Added support for <b>cubic</b> and <b>shannon</b> interpolation for rate and pitch shift effects besides
the original <b>linear</b> interpolation, to reduce aliasing at high frequencies due to interpolation.
Cubic interpolation is used as default for floating point processing, and linear interpolation for integer
<li>Fixed bug in anti-alias filtering that limited stop-band attenuation to -10 dB instead of <-50dB, and
increased filter length from 32 to 64 taps to further reduce aliasing due to frequency folding.</li>
<li>Performance improvements in cross-correlation algorithm</li>
<li>Other bug and compatibility fixes</li>
<li>Added files for Android compilation
@ -660,6 +740,11 @@ accessing the FIFOSampleBuffer class from external files. </li>
<h3>5.2. SoundStretch application Change History </h3>
<li>Added support for WAV file 'fact' information chunk.</li>
<li>Bugfixes in Wavfile: exception string formatting, avoid getLengthMs() integer
@ -718,40 +803,37 @@ switch "-bpm" </li>
<h2>6. Acknowledgements </h2>
<p>Kudos for these people who have contributed to development or
submitted bugfixes since SoundTouch v1.3.1: </p>
submitted bugfixes:</p>
<li> Arthur A</li>
<li> Richard Ash</li>
<li> Stanislav Brabec</li>
<li> Christian Budde</li>
<li> Chris Bryan</li>
<li> Jacek Caban</li>
<li> Brian Cameron</li>
<li> Jason Champion</li>
<li> David Clark</li>
<li> Patrick Colis</li>
<li> Miquel Colon</li>
<li> Justin Frankel</li>
<li> Jason Garland</li>
<li> Takashi Iwai</li>
<li> Yuval Naveh</li>
<li> Paulo Pizarro</li>
<li> Blaise Potard</li>
<li> RJ Ryan</li>
<li> Patrick Colis </li>
<li> Miquel Colon </li>
<li> Jim Credland</li>
<li> Sandro Cumerlato</li>
<li> Justin Frankel</li>
<li> Masa H.</li>
<li> Jason Garland</li>
<li> Takashi Iwai</li>
<li> Thomas Klausner</li>
<li> Mathias Möhl</li>
<li> Yuval Naveh</li>
<li> Paulo Pizarro</li>
<li> Blaise Potard</li>
<li> Michael Pruett</li>
<li> Rajeev Puran</li>
<li> RJ Ryan</li>
<li> John Sheehy</li>
<li> Tim Shuttleworth</li>
<li> Albert Sirvent</li>
<li> John Stumpo</li>
<li> Tim Shuttleworth</li>
<li> Katja Vetter</li>
<h2>7. LICENSE</h2>
@ -770,8 +852,8 @@ General Public License for more details.</p>
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA</p>
$Id: README.html 168 2012-12-28 20:55:19Z oparviai $ -->
$Id: README.html 220 2015-05-18 17:39:26Z oparviai $ -->
<i>RREADME.html file updated on 28-Dec-2012</i></p>
<i>README.html file updated in May-2015</i></p>

int WavInFile::checkCharTags() const
// header.format.fmt should equal to 'fmt '
if (memcmp(fmtStr, header.format.fmt, 4) != 0) return -1;
// header.data.data_field should equal to 'data'
if (memcmp(dataStr, header.data.data_field, 4) != 0) return -1;
return 0;
int WavInFile::read(char *buffer, int maxElems)
int numBytes;
uint afterDataRead;
// ensure it's 8 bit format
if (header.format.bits_per_sample != 8)
throw runtime_error("Error: WavInFile::read(char*, int) works only with 8bit samples.");
assert(sizeof(char) == 1);
numBytes = maxElems;
afterDataRead = dataRead + numBytes;
if (afterDataRead > header.data.data_len)
// Don't read more samples than are marked available in header
numBytes = (int)header.data.data_len - (int)dataRead;
assert(numBytes >= 0);
numBytes = fread(buffer, 1, numBytes, fptr);
dataRead += numBytes;
return numBytes;
int WavInFile::read(short *buffer, int maxElems)
unsigned int afterDataRead;
int numBytes;
int numElems;
if (header.format.bits_per_sample == 8)
// 8 bit format
char *temp = new char[maxElems];
int i;
numElems = read(temp, maxElems);
// convert from 8 to 16 bit
for (i = 0; i < numElems; i ++)
buffer[i] = temp[i] << 8;
delete[] temp;
// 16 bit format
assert(header.format.bits_per_sample == 16);
assert(sizeof(short) == 2);
numBytes = maxElems * 2;
afterDataRead = dataRead + numBytes;
if (afterDataRead > header.data.data_len)
// Don't read more samples than are marked available in header
numBytes = (int)header.data.data_len - (int)dataRead;
assert(numBytes >= 0);
numBytes = fread(buffer, 1, numBytes, fptr);
dataRead += numBytes;
numElems = numBytes / 2;
// 16bit samples, swap byte order if necessary
_swap16Buffer((unsigned short *)buffer, numElems);
return numElems;
int WavInFile::read(float *buffer, int maxElems)
short *temp = new short[maxElems];
int num;
int i;
double fscale;
num = read(temp, maxElems);
fscale = 1.0 / 32768.0;
// convert to floats, scale to range [-1..+1[
for (i = 0; i < num; i ++)
buffer[i] = (float)(fscale * (double)temp[i]);
delete[] temp;
return num;
int WavInFile::eof() const
// return true if all data has been read or file eof has reached
return (dataRead == header.data.data_len || feof(fptr));
// test if character code is between a white space ' ' and little 'z'
static int isAlpha(char c)
return (c >= ' ' && c <= 'z') ? 1 : 0;
// test if all characters are between a white space ' ' and little 'z'
static int isAlphaStr(const char *str)
char c;
c = str[0];
while (c)
if (isAlpha(c) == 0) return 0;
str ++;
c = str[0];
return 1;
int WavInFile::readRIFFBlock()
if (fread(&(header.riff), sizeof(WavRiff), 1, fptr) != 1) return -1;
// swap 32bit data byte order if necessary
_swap32((unsigned int &)header.riff.package_len);
// header.riff.riff_char should equal to 'RIFF');
if (memcmp(riffStr, header.riff.riff_char, 4) != 0) return -1;
// header.riff.wave should equal to 'WAVE'
if (memcmp(waveStr, header.riff.wave, 4) != 0) return -1;
return 0;
int WavInFile::readHeaderBlock()
char label[5];
string sLabel;
// lead label string
if (fread(label, 1, 4, fptr) !=4) return -1;
label[4] = 0;
if (isAlphaStr(label) == 0) return -1; // not a valid label
// Decode blocks according to their label
if (strcmp(label, fmtStr) == 0)
int nLen, nDump;
// 'fmt ' block
memcpy(header.format.fmt, fmtStr, 4);
// read length of the format field
if (fread(&nLen, sizeof(int), 1, fptr) != 1) return -1;
// swap byte order if necessary
_swap32((unsigned int &)nLen); // int format_len;
header.format.format_len = nLen;
// calculate how much length differs from expected
nDump = nLen - ((int)sizeof(header.format) - 8);
// if format_len is larger than expected, read only as much data as we've space for
if (nDump > 0)
nLen = sizeof(header.format) - 8;
// read data
if (fread(&(header.format.fixed), nLen, 1, fptr) != 1) return -1;
// swap byte order if necessary
_swap16((unsigned short &)header.format.fixed); // short int fixed;
_swap16((unsigned short &)header.format.channel_number); // short int channel_number;
_swap32((unsigned int &)header.format.sample_rate); // int sample_rate;
_swap32((unsigned int &)header.format.byte_rate); // int byte_rate;
_swap16((unsigned short &)header.format.byte_per_sample); // short int byte_per_sample;
_swap16((unsigned short &)header.format.bits_per_sample); // short int bits_per_sample;
// if format_len is larger than expected, skip the extra data
if (nDump > 0)
fseek(fptr, nDump, SEEK_CUR);
return 0;
else if (strcmp(label, dataStr) == 0)
// 'data' block
memcpy(header.data.data_field, dataStr, 4);
if (fread(&(header.data.data_len), sizeof(uint), 1, fptr) != 1) return -1;
// swap byte order if necessary
_swap32((unsigned int &)header.data.data_len);
return 1;
uint len, i;
uint temp;
// unknown block
// read length
if (fread(&len, sizeof(len), 1, fptr) != 1) return -1;
// scan through the block
for (i = 0; i < len; i ++)
if (fread(&temp, 1, 1, fptr) != 1) return -1;
if (feof(fptr)) return -1; // unexpected eof
return 0;
int WavInFile::readWavHeaders()
int res;
memset(&header, 0, sizeof(header));
res = readRIFFBlock();
if (res) return 1;
// read header blocks until data block is found
// read header blocks
res = readHeaderBlock();
if (res < 0) return 1; // error in file structure
} while (res == 0);
// check that all required tags are legal
return checkCharTags();
uint WavInFile::getNumChannels() const
return header.format.channel_number;
uint WavInFile::getNumBits() const
return header.format.bits_per_sample;
uint WavInFile::getBytesPerSample() const
return getNumChannels() * getNumBits() / 8;
uint WavInFile::getSampleRate() const
return header.format.sample_rate;
uint WavInFile::getDataSizeInBytes() const
return header.data.data_len;
uint WavInFile::getNumSamples() const
if (header.format.byte_per_sample == 0) return 0;
return header.data.data_len / (unsigned short)header.format.byte_per_sample;
uint WavInFile::getLengthMS() const
uint numSamples;
uint sampleRate;
numSamples = getNumSamples();
sampleRate = getSampleRate();
assert(numSamples < UINT_MAX / 1000);
return (1000 * numSamples / sampleRate);
// Class WavOutFile
WavOutFile::WavOutFile(const char *fileName, int sampleRate, int bits, int channels)
bytesWritten = 0;
fptr = fopen(fileName, "wb");
if (fptr == NULL)
string msg = "Error : Unable to open file \"";
msg += fileName;
msg += "\" for writing.";
//pmsg = msg.c_str;
throw runtime_error(msg);
fillInHeader(sampleRate, bits, channels);
WavOutFile::WavOutFile(FILE *file, int sampleRate, int bits, int channels)
bytesWritten = 0;
fptr = file;
if (fptr == NULL)
string msg = "Error : Unable to access output file stream.";
throw runtime_error(msg);
fillInHeader(sampleRate, bits, channels);
if (fptr) fclose(fptr);
fptr = NULL;
void WavOutFile::fillInHeader(uint sampleRate, uint bits, uint channels)
// fill in the 'riff' part..
// copy string 'RIFF' to riff_char
memcpy(&(header.riff.riff_char), riffStr, 4);
// package_len unknown so far
header.riff.package_len = 0;
// copy string 'WAVE' to wave
memcpy(&(header.riff.wave), waveStr, 4);
// fill in the 'format' part..
// copy string 'fmt ' to fmt
memcpy(&(header.format.fmt), fmtStr, 4);
header.format.format_len = 0x10;
header.format.fixed = 1;
header.format.channel_number = (short)channels;
header.format.sample_rate = (int)sampleRate;
header.format.bits_per_sample = (short)bits;
header.format.byte_per_sample = (short)(bits * channels / 8);
header.format.byte_rate = header.format.byte_per_sample * (int)sampleRate;
header.format.sample_rate = (int)sampleRate;
// fill in the 'data' part..
// copy string 'data' to data_field
memcpy(&(header.data.data_field), dataStr, 4);
// data_len unknown so far
header.data.data_len = 0;
void WavOutFile::finishHeader()
// supplement the file length into the header structure
header.riff.package_len = bytesWritten + 36;
header.data.data_len = bytesWritten;
void WavOutFile::writeHeader()
WavHeader hdrTemp;
int res;
// swap byte order if necessary
hdrTemp = header;
_swap32((unsigned int &)hdrTemp.riff.package_len);
_swap32((unsigned int &)hdrTemp.format.format_len);
_swap16((unsigned short &)hdrTemp.format.fixed);
_swap16((unsigned short &)hdrTemp.format.channel_number);
_swap32((unsigned int &)hdrTemp.format.sample_rate);
_swap32((unsigned int &)hdrTemp.format.byte_rate);
_swap16((unsigned short &)hdrTemp.format.byte_per_sample);
_swap16((unsigned short &)hdrTemp.format.bits_per_sample);
_swap32((unsigned int &)hdrTemp.data.data_len);
// write the supplemented header in the beginning of the file
fseek(fptr, 0, SEEK_SET);
res = fwrite(&hdrTemp, sizeof(hdrTemp), 1, fptr);
if (res != 1)
throw runtime_error("Error while writing to a wav file.");
// jump back to the end of the file
fseek(fptr, 0, SEEK_END);
void WavOutFile::write(const char *buffer, int numElems)
int res;
if (header.format.bits_per_sample != 8)
throw runtime_error("Error: WavOutFile::write(const char*, int) accepts only 8bit samples.");
assert(sizeof(char) == 1);
res = fwrite(buffer, 1, numElems, fptr);
if (res != numElems)
throw runtime_error("Error while writing to a wav file.");
bytesWritten += numElems;
void WavOutFile::write(const short *buffer, int numElems)
int res;
// 16 bit samples
if (numElems < 1) return; // nothing to do
if (header.format.bits_per_sample == 8)
int i;
char *temp = new char[numElems];
// convert from 16bit format to 8bit format
for (i = 0; i < numElems; i ++)
temp[i] = buffer[i] >> 8;
// write in 8bit format
write(temp, numElems);
delete[] temp;
// 16bit format
unsigned short *pTemp = new unsigned short[numElems];
assert(header.format.bits_per_sample == 16);
// allocate temp buffer to swap byte order if necessary
memcpy(pTemp, buffer, numElems * 2);
_swap16Buffer(pTemp, numElems);
res = fwrite(pTemp, 2, numElems, fptr);
delete[] pTemp;
if (res != numElems)
throw runtime_error("Error while writing to a wav file.");
bytesWritten += 2 * numElems;
void WavOutFile::write(const float *buffer, int numElems)
int i;
short *temp = new short[numElems];
int iTemp;
// convert to 16 bit integer
for (i = 0; i < numElems; i ++)
// convert to integer
iTemp = (int)(32768.0f * buffer[i]);
// saturate
if (iTemp < -32768) iTemp = -32768;
if (iTemp > 32767) iTemp = 32767;
temp[i] = (short)iTemp;
write(temp, numElems);
delete[] temp;

test edx, 0x02000000
jz test3DNow ; branch if SSE not available
or esi, SUPPORT_SSE ; otherwise add SSE support bit
; test for precense of AMD extensions
mov eax, 0x80000000
cmp eax, 0x80000000
jbe end ; branch if no AMD extensions detected
; test for precense of 3DNow! extension
mov eax, 0x80000001
test edx, 0x80000000
jz end ; branch if 3DNow! not detected
or esi, SUPPORT_3DNOW ; otherwise add 3DNow support bit
mov res, esi
// Visual C++ 64bit compilation doesn't support inline assembler. However,
// all x64 compatible CPUs support MMX & SSE extensions.
return res & ~_dwDisabledISA;

View File

View File

View File

View File

@ -26,7 +26,7 @@
// Last changed : $Date: 2012-08-30 16:53:44 -0300 (qui, 30 ago 2012) $
// Last changed : $Date: 2012-08-30 19:53:44 +0000 (Thu, 30 Aug 2012) $
// File revision : $Revision: 4 $
// $Id: BPMDetect.h 150 2012-08-30 19:53:44Z oparviai $

View File

@ -15,10 +15,10 @@
// Last changed : $Date: 2012-06-13 16:29:53 -0300 (qua, 13 jun 2012) $
// Last changed : $Date: 2014-01-05 21:40:22 +0000 (Sun, 05 Jan 2014) $
// File revision : $Revision: 4 $
// $Id: FIFOSampleBuffer.h 143 2012-06-13 19:29:53Z oparviai $
// $Id: FIFOSampleBuffer.h 177 2014-01-05 21:40:22Z oparviai $
@ -162,6 +162,12 @@ public:
/// Sets number of channels, 1 = mono, 2 = stereo.
void setChannels(int numChannels);
/// Get number of channels
int getChannels()
return channels;
/// Returns nonzero if there aren't any samples available for outputting.
virtual int isEmpty() const;

View File

@ -17,7 +17,7 @@
// Last changed : $Date: 2012-06-13 16:29:53 -0300 (qua, 13 jun 2012) $
// Last changed : $Date: 2012-06-13 19:29:53 +0000 (Wed, 13 Jun 2012) $
// File revision : $Revision: 4 $
// $Id: FIFOSamplePipe.h 143 2012-06-13 19:29:53Z oparviai $

View File

@ -8,10 +8,10 @@
// Last changed : $Date: 2012-12-28 12:53:56 -0200 (sex, 28 dez 2012) $
// Last changed : $Date: 2015-05-18 15:25:07 +0000 (Mon, 18 May 2015) $
// File revision : $Revision: 3 $
// $Id: STTypes.h 162 2012-12-28 14:53:56Z oparviai $
// $Id: STTypes.h 215 2015-05-18 15:25:07Z oparviai $
@ -60,16 +60,6 @@ typedef unsigned long ulong;
#include "soundtouch_config.h"
#ifndef _WINDEF_
// if these aren't defined already by Windows headers, define now
typedef int BOOL;
#define FALSE 0
#define TRUE 1
#endif // _WINDEF_
namespace soundtouch
@ -78,7 +68,14 @@ namespace soundtouch
#if (defined(__SOFTFP__))
/// If following flag is defined, always uses multichannel processing
/// routines also for mono and stero sound. This is for routine testing
/// purposes; output should be same with either routines, yet disabling
/// the dedicated mono/stereo processing routines will result in slower
/// runtime performance so recommendation is to keep this off.
#if (defined(__SOFTFP__) && defined(ANDROID))
// For Android compilation: Force use of Integer samples in case that
// compilation uses soft-floating point emulation - soft-fp is way too slow
@ -175,6 +172,7 @@ namespace soundtouch
// use c++ standard exceptions
#include <stdexcept>
#include <string>
#define ST_THROW_RT_ERROR(x) {throw std::runtime_error(x);}

View File

@ -41,10 +41,10 @@
// Last changed : $Date: 2012-12-28 17:32:59 -0200 (sex, 28 dez 2012) $
// Last changed : $Date: 2015-05-18 15:28:41 +0000 (Mon, 18 May 2015) $
// File revision : $Revision: 4 $
// $Id: SoundTouch.h 163 2012-12-28 19:32:59Z oparviai $
// $Id: SoundTouch.h 216 2015-05-18 15:28:41Z oparviai $
@ -79,10 +79,10 @@ namespace soundtouch
/// Soundtouch library version string
#define SOUNDTOUCH_VERSION "1.7.1"
#define SOUNDTOUCH_VERSION "1.9.0"
/// SoundTouch library version id
// Available setting IDs for the 'setSetting' & 'get_setting' functions:
@ -160,7 +160,7 @@ private:
float virtualPitch;
/// Flag: Has sample rate been set?
BOOL bSrateSet;
bool bSrateSet;
/// Calculates effective rate & tempo valuescfrom 'virtualRate', 'virtualTempo' and
/// 'virtualPitch' parameters.
@ -247,8 +247,8 @@ public:
/// Changes a setting controlling the processing system behaviour. See the
/// 'SETTING_...' defines for available setting ID's.
/// \return 'TRUE' if the setting was succesfully changed
BOOL setSetting(int settingId, ///< Setting ID number. see SETTING_... defines.
/// \return 'true' if the setting was succesfully changed
bool setSetting(int settingId, ///< Setting ID number. see SETTING_... defines.
int value ///< New setting value.

View File

@ -1,7 +0,0 @@

View File

@ -0,0 +1,997 @@
/// Classes for easy reading & writing of WAV sound files.
/// For big-endian CPU, define _BIG_ENDIAN_ during compile-time to correctly
/// parse the WAV files with such processors.
/// Admittingly, more complete WAV reader routines may exist in public domain,
/// but the reason for 'yet another' one is that those generic WAV reader
/// libraries are exhaustingly large and cumbersome! Wanted to have something
/// simpler here, i.e. something that's not already larger than rest of the
/// SoundTouch/SoundStretch program...
/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
// Last changed : $Date: 2014-10-05 16:20:24 +0000 (Sun, 05 Oct 2014) $
// File revision : $Revision: 4 $
// $Id: WavFile.cpp 200 2014-10-05 16:20:24Z oparviai $
// License :
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#include <stdio.h>
#include <string>
#include <sstream>
#include <cstring>
#include <assert.h>
#include <limits.h>
#include "WavFile.h"
#include "STTypes.h"
using namespace std;
static const char riffStr[] = "RIFF";
static const char waveStr[] = "WAVE";
static const char fmtStr[] = "fmt ";
static const char factStr[] = "fact";
static const char dataStr[] = "data";
// Helper functions for swapping byte order to correctly read/write WAV files
// with big-endian CPU's: Define compile-time definition _BIG_ENDIAN_ to
// turn-on the conversion if it appears necessary.
// For example, Intel x86 is little-endian and doesn't require conversion,
// while PowerPC of Mac's and many other RISC cpu's are big-endian.
// In gcc compiler detect the byte order automatically
// big-endian platform.
#define _BIG_ENDIAN_
#ifdef _BIG_ENDIAN_
// big-endian CPU, swap bytes in 16 & 32 bit words
// helper-function to swap byte-order of 32bit integer
static inline int _swap32(int &dwData)
dwData = ((dwData >> 24) & 0x000000FF) |
((dwData >> 8) & 0x0000FF00) |
((dwData << 8) & 0x00FF0000) |
((dwData << 24) & 0xFF000000);
return dwData;
// helper-function to swap byte-order of 16bit integer
static inline short _swap16(short &wData)
wData = ((wData >> 8) & 0x00FF) |
((wData << 8) & 0xFF00);
return wData;
// helper-function to swap byte-order of buffer of 16bit integers
static inline void _swap16Buffer(short *pData, int numWords)
int i;
for (i = 0; i < numWords; i ++)
pData[i] = _swap16(pData[i]);
#else // BIG_ENDIAN
// little-endian CPU, WAV file is ok as such
// dummy helper-function
static inline int _swap32(int &dwData)
// do nothing
return dwData;
// dummy helper-function
static inline short _swap16(short &wData)
// do nothing
return wData;
// dummy helper-function
static inline void _swap16Buffer(short *pData, int numBytes)
// do nothing
#endif // BIG_ENDIAN
// Class WavFileBase
convBuff = NULL;
convBuffSize = 0;
delete[] convBuff;
convBuffSize = 0;
/// Get pointer to conversion buffer of at min. given size
void *WavFileBase::getConvBuffer(int sizeBytes)
if (convBuffSize < sizeBytes)
delete[] convBuff;
convBuffSize = (sizeBytes + 15) & -8; // round up to following 8-byte bounday
convBuff = new char[convBuffSize];
return convBuff;
// Class WavInFile
WavInFile::WavInFile(const char *fileName)
// Try to open the file for reading
fptr = fopen(fileName, "rb");
if (fptr == NULL)
// didn't succeed
string msg = "Error : Unable to open file \"";
msg += fileName;
msg += "\" for reading.";
WavInFile::WavInFile(FILE *file)
// Try to open the file for reading
fptr = file;
if (!file)
// didn't succeed
string msg = "Error : Unable to access input stream for reading";
/// Init the WAV file stream
void WavInFile::init()
int hdrsOk;
// assume file stream is already open
// Read the file headers
hdrsOk = readWavHeaders();
if (hdrsOk != 0)
// Something didn't match in the wav file headers
string msg = "Input file is corrupt or not a WAV file";
/* Ignore 'fixed' field value as 32bit signed linear data can have other value than 1.
if (header.format.fixed != 1)
string msg = "Input file uses unsupported encoding.";
dataRead = 0;
if (fptr) fclose(fptr);
fptr = NULL;
void WavInFile::rewind()
int hdrsOk;
fseek(fptr, 0, SEEK_SET);
hdrsOk = readWavHeaders();
assert(hdrsOk == 0);
dataRead = 0;
int WavInFile::checkCharTags() const
// header.format.fmt should equal to 'fmt '
if (memcmp(fmtStr, header.format.fmt, 4) != 0) return -1;
// header.data.data_field should equal to 'data'
if (memcmp(dataStr, header.data.data_field, 4) != 0) return -1;
return 0;
int WavInFile::read(unsigned char *buffer, int maxElems)
int numBytes;
uint afterDataRead;
// ensure it's 8 bit format
if (header.format.bits_per_sample != 8)
ST_THROW_RT_ERROR("Error: WavInFile::read(char*, int) works only with 8bit samples.");
assert(sizeof(char) == 1);
numBytes = maxElems;
afterDataRead = dataRead + numBytes;
if (afterDataRead > header.data.data_len)
// Don't read more samples than are marked available in header
numBytes = (int)header.data.data_len - (int)dataRead;
assert(numBytes >= 0);
numBytes = (int)fread(buffer, 1, numBytes, fptr);
dataRead += numBytes;
return numBytes;
int WavInFile::read(short *buffer, int maxElems)
unsigned int afterDataRead;
int numBytes;
int numElems;
switch (header.format.bits_per_sample)
case 8:
// 8 bit format
unsigned char *temp = (unsigned char*)getConvBuffer(maxElems);
int i;
numElems = read(temp, maxElems);
// convert from 8 to 16 bit
for (i = 0; i < numElems; i ++)
buffer[i] = (short)(((short)temp[i] - 128) * 256);
case 16:
// 16 bit format
assert(sizeof(short) == 2);
numBytes = maxElems * 2;
afterDataRead = dataRead + numBytes;
if (afterDataRead > header.data.data_len)
// Don't read more samples than are marked available in header
numBytes = (int)header.data.data_len - (int)dataRead;
assert(numBytes >= 0);
numBytes = (int)fread(buffer, 1, numBytes, fptr);
dataRead += numBytes;
numElems = numBytes / 2;
// 16bit samples, swap byte order if necessary
_swap16Buffer((short *)buffer, numElems);
stringstream ss;
ss << "\nOnly 8/16 bit sample WAV files supported in integer compilation. Can't open WAV file with ";
ss << (int)header.format.bits_per_sample;
ss << " bit sample format. ";
return numElems;
/// Read data in float format. Notice that when reading in float format
/// 8/16/24/32 bit sample formats are supported
int WavInFile::read(float *buffer, int maxElems)
unsigned int afterDataRead;
int numBytes;
int numElems;
int bytesPerSample;
bytesPerSample = header.format.bits_per_sample / 8;
if ((bytesPerSample < 1) || (bytesPerSample > 4))
stringstream ss;
ss << "\nOnly 8/16/24/32 bit sample WAV files supported. Can't open WAV file with ";
ss << (int)header.format.bits_per_sample;
ss << " bit sample format. ";
numBytes = maxElems * bytesPerSample;
afterDataRead = dataRead + numBytes;
if (afterDataRead > header.data.data_len)
// Don't read more samples than are marked available in header
numBytes = (int)header.data.data_len - (int)dataRead;
assert(numBytes >= 0);
// read raw data into temporary buffer
char *temp = (char*)getConvBuffer(numBytes);
numBytes = (int)fread(temp, 1, numBytes, fptr);
dataRead += numBytes;
numElems = numBytes / bytesPerSample;
// swap byte ordert & convert to float, depending on sample format
switch (bytesPerSample)
case 1:
unsigned char *temp2 = (unsigned char*)temp;
double conv = 1.0 / 128.0;
for (int i = 0; i < numElems; i ++)
buffer[i] = (float)(temp2[i] * conv - 1.0);
case 2:
short *temp2 = (short*)temp;
double conv = 1.0 / 32768.0;
for (int i = 0; i < numElems; i ++)
short value = temp2[i];
buffer[i] = (float)(_swap16(value) * conv);
case 3:
char *temp2 = (char *)temp;
double conv = 1.0 / 8388608.0;
for (int i = 0; i < numElems; i ++)
int value = *((int*)temp2);
value = _swap32(value) & 0x00ffffff; // take 24 bits
value |= (value & 0x00800000) ? 0xff000000 : 0; // extend minus sign bits
buffer[i] = (float)(value * conv);
temp2 += 3;
case 4:
int *temp2 = (int *)temp;
double conv = 1.0 / 2147483648.0;
assert(sizeof(int) == 4);
for (int i = 0; i < numElems; i ++)
int value = temp2[i];
buffer[i] = (float)(_swap32(value) * conv);
return numElems;
int WavInFile::eof() const
// return true if all data has been read or file eof has reached
return (dataRead == header.data.data_len || feof(fptr));
// test if character code is between a white space ' ' and little 'z'
static int isAlpha(char c)
return (c >= ' ' && c <= 'z') ? 1 : 0;
// test if all characters are between a white space ' ' and little 'z'
static int isAlphaStr(const char *str)
char c;
c = str[0];
while (c)
if (isAlpha(c) == 0) return 0;
str ++;
c = str[0];
return 1;
int WavInFile::readRIFFBlock()
if (fread(&(header.riff), sizeof(WavRiff), 1, fptr) != 1) return -1;
// swap 32bit data byte order if necessary
_swap32((int &)header.riff.package_len);
// header.riff.riff_char should equal to 'RIFF');
if (memcmp(riffStr, header.riff.riff_char, 4) != 0) return -1;
// header.riff.wave should equal to 'WAVE'
if (memcmp(waveStr, header.riff.wave, 4) != 0) return -1;
return 0;
int WavInFile::readHeaderBlock()
char label[5];
string sLabel;
// lead label string
if (fread(label, 1, 4, fptr) !=4) return -1;
label[4] = 0;
if (isAlphaStr(label) == 0) return -1; // not a valid label
// Decode blocks according to their label
if (strcmp(label, fmtStr) == 0)
int nLen, nDump;
// 'fmt ' block
memcpy(header.format.fmt, fmtStr, 4);
// read length of the format field
if (fread(&nLen, sizeof(int), 1, fptr) != 1) return -1;
// swap byte order if necessary
_swap32(nLen); // int format_len;
header.format.format_len = nLen;
// calculate how much length differs from expected
nDump = nLen - ((int)sizeof(header.format) - 8);
// if format_len is larger than expected, read only as much data as we've space for
if (nDump > 0)
nLen = sizeof(header.format) - 8;
// read data
if (fread(&(header.format.fixed), nLen, 1, fptr) != 1) return -1;
// swap byte order if necessary
_swap16(header.format.fixed); // short int fixed;
_swap16(header.format.channel_number); // short int channel_number;
_swap32((int &)header.format.sample_rate); // int sample_rate;
_swap32((int &)header.format.byte_rate); // int byte_rate;
_swap16(header.format.byte_per_sample); // short int byte_per_sample;
_swap16(header.format.bits_per_sample); // short int bits_per_sample;
// if format_len is larger than expected, skip the extra data
if (nDump > 0)
fseek(fptr, nDump, SEEK_CUR);
return 0;
else if (strcmp(label, factStr) == 0)
int nLen, nDump;
// 'fact' block
memcpy(header.fact.fact_field, factStr, 4);
// read length of the fact field
if (fread(&nLen, sizeof(int), 1, fptr) != 1) return -1;
// swap byte order if necessary
_swap32(nLen); // int fact_len;
header.fact.fact_len = nLen;
// calculate how much length differs from expected
nDump = nLen - ((int)sizeof(header.fact) - 8);
// if format_len is larger than expected, read only as much data as we've space for
if (nDump > 0)
nLen = sizeof(header.fact) - 8;
// read data
if (fread(&(header.fact.fact_sample_len), nLen, 1, fptr) != 1) return -1;
// swap byte order if necessary
_swap32((int &)header.fact.fact_sample_len); // int sample_length;
// if fact_len is larger than expected, skip the extra data
if (nDump > 0)
fseek(fptr, nDump, SEEK_CUR);
return 0;
else if (strcmp(label, dataStr) == 0)
// 'data' block
memcpy(header.data.data_field, dataStr, 4);
if (fread(&(header.data.data_len), sizeof(uint), 1, fptr) != 1) return -1;
// swap byte order if necessary
_swap32((int &)header.data.data_len);
return 1;
uint len, i;
uint temp;
// unknown block
// read length
if (fread(&len, sizeof(len), 1, fptr) != 1) return -1;
// scan through the block
for (i = 0; i < len; i ++)
if (fread(&temp, 1, 1, fptr) != 1) return -1;
if (feof(fptr)) return -1; // unexpected eof
return 0;
int WavInFile::readWavHeaders()
int res;
memset(&header, 0, sizeof(header));
res = readRIFFBlock();
if (res) return 1;
// read header blocks until data block is found
// read header blocks
res = readHeaderBlock();
if (res < 0) return 1; // error in file structure
} while (res == 0);
// check that all required tags are legal
return checkCharTags();
uint WavInFile::getNumChannels() const
return header.format.channel_number;
uint WavInFile::getNumBits() const
return header.format.bits_per_sample;
uint WavInFile::getBytesPerSample() const
return getNumChannels() * getNumBits() / 8;
uint WavInFile::getSampleRate() const
return header.format.sample_rate;
uint WavInFile::getDataSizeInBytes() const
return header.data.data_len;
uint WavInFile::getNumSamples() const
if (header.format.byte_per_sample == 0) return 0;
if (header.format.fixed > 1) return header.fact.fact_sample_len;
return header.data.data_len / (unsigned short)header.format.byte_per_sample;
uint WavInFile::getLengthMS() const
double numSamples;
double sampleRate;
numSamples = (double)getNumSamples();
sampleRate = (double)getSampleRate();
return (uint)(1000.0 * numSamples / sampleRate + 0.5);
/// Returns how many milliseconds of audio have so far been read from the file
uint WavInFile::getElapsedMS() const
return (uint)(1000.0 * (double)dataRead / (double)header.format.byte_rate);
// Class WavOutFile
WavOutFile::WavOutFile(const char *fileName, int sampleRate, int bits, int channels)
bytesWritten = 0;
fptr = fopen(fileName, "wb");
if (fptr == NULL)
string msg = "Error : Unable to open file \"";
msg += fileName;
msg += "\" for writing.";
//pmsg = msg.c_str;
fillInHeader(sampleRate, bits, channels);
WavOutFile::WavOutFile(FILE *file, int sampleRate, int bits, int channels)
bytesWritten = 0;
fptr = file;
if (fptr == NULL)
string msg = "Error : Unable to access output file stream.";
fillInHeader(sampleRate, bits, channels);
if (fptr) fclose(fptr);
fptr = NULL;
void WavOutFile::fillInHeader(uint sampleRate, uint bits, uint channels)
// fill in the 'riff' part..
// copy string 'RIFF' to riff_char
memcpy(&(header.riff.riff_char), riffStr, 4);
// package_len unknown so far
header.riff.package_len = 0;
// copy string 'WAVE' to wave
memcpy(&(header.riff.wave), waveStr, 4);
// fill in the 'format' part..
// copy string 'fmt ' to fmt
memcpy(&(header.format.fmt), fmtStr, 4);
header.format.format_len = 0x10;
header.format.fixed = 1;
header.format.channel_number = (short)channels;
header.format.sample_rate = (int)sampleRate;
header.format.bits_per_sample = (short)bits;
header.format.byte_per_sample = (short)(bits * channels / 8);
header.format.byte_rate = header.format.byte_per_sample * (int)sampleRate;
header.format.sample_rate = (int)sampleRate;
// fill in the 'fact' part...
memcpy(&(header.fact.fact_field), factStr, 4);
header.fact.fact_len = 4;
header.fact.fact_sample_len = 0;
// fill in the 'data' part..
// copy string 'data' to data_field
memcpy(&(header.data.data_field), dataStr, 4);
// data_len unknown so far
header.data.data_len = 0;
void WavOutFile::finishHeader()
// supplement the file length into the header structure
header.riff.package_len = bytesWritten + sizeof(WavHeader) - sizeof(WavRiff) + 4;
header.data.data_len = bytesWritten;
header.fact.fact_sample_len = bytesWritten / header.format.byte_per_sample;
void WavOutFile::writeHeader()
WavHeader hdrTemp;
int res;
// swap byte order if necessary
hdrTemp = header;
_swap32((int &)hdrTemp.riff.package_len);
_swap32((int &)hdrTemp.format.format_len);
_swap16((short &)hdrTemp.format.fixed);
_swap16((short &)hdrTemp.format.channel_number);
_swap32((int &)hdrTemp.format.sample_rate);
_swap32((int &)hdrTemp.format.byte_rate);
_swap16((short &)hdrTemp.format.byte_per_sample);
_swap16((short &)hdrTemp.format.bits_per_sample);
_swap32((int &)hdrTemp.data.data_len);
_swap32((int &)hdrTemp.fact.fact_len);
_swap32((int &)hdrTemp.fact.fact_sample_len);
// write the supplemented header in the beginning of the file
fseek(fptr, 0, SEEK_SET);
res = (int)fwrite(&hdrTemp, sizeof(hdrTemp), 1, fptr);
if (res != 1)
ST_THROW_RT_ERROR("Error while writing to a wav file.");
// jump back to the end of the file
fseek(fptr, 0, SEEK_END);
void WavOutFile::write(const unsigned char *buffer, int numElems)
int res;
if (header.format.bits_per_sample != 8)
ST_THROW_RT_ERROR("Error: WavOutFile::write(const char*, int) accepts only 8bit samples.");
assert(sizeof(char) == 1);
res = (int)fwrite(buffer, 1, numElems, fptr);
if (res != numElems)
ST_THROW_RT_ERROR("Error while writing to a wav file.");
bytesWritten += numElems;
void WavOutFile::write(const short *buffer, int numElems)
int res;
// 16 bit samples
if (numElems < 1) return; // nothing to do
switch (header.format.bits_per_sample)
case 8:
int i;
unsigned char *temp = (unsigned char *)getConvBuffer(numElems);
// convert from 16bit format to 8bit format
for (i = 0; i < numElems; i ++)
temp[i] = (unsigned char)(buffer[i] / 256 + 128);
// write in 8bit format
write(temp, numElems);
case 16:
// 16bit format
// use temp buffer to swap byte order if necessary
short *pTemp = (short *)getConvBuffer(numElems * sizeof(short));
memcpy(pTemp, buffer, numElems * 2);
_swap16Buffer(pTemp, numElems);
res = (int)fwrite(pTemp, 2, numElems, fptr);
if (res != numElems)
ST_THROW_RT_ERROR("Error while writing to a wav file.");
bytesWritten += 2 * numElems;
stringstream ss;
ss << "\nOnly 8/16 bit sample WAV files supported in integer compilation. Can't open WAV file with ";
ss << (int)header.format.bits_per_sample;
ss << " bit sample format. ";
/// Convert from float to integer and saturate
inline int saturate(float fvalue, float minval, float maxval)
if (fvalue > maxval)
fvalue = maxval;
else if (fvalue < minval)
fvalue = minval;
return (int)fvalue;
void WavOutFile::write(const float *buffer, int numElems)
int numBytes;
int bytesPerSample;
if (numElems == 0) return;
bytesPerSample = header.format.bits_per_sample / 8;
numBytes = numElems * bytesPerSample;
short *temp = (short*)getConvBuffer(numBytes);
switch (bytesPerSample)
case 1:
unsigned char *temp2 = (unsigned char *)temp;
for (int i = 0; i < numElems; i ++)
temp2[i] = (unsigned char)saturate(buffer[i] * 128.0f + 128.0f, 0.0f, 255.0f);
case 2:
short *temp2 = (short *)temp;
for (int i = 0; i < numElems; i ++)
short value = (short)saturate(buffer[i] * 32768.0f, -32768.0f, 32767.0f);
temp2[i] = _swap16(value);
case 3:
char *temp2 = (char *)temp;
for (int i = 0; i < numElems; i ++)
int value = saturate(buffer[i] * 8388608.0f, -8388608.0f, 8388607.0f);
*((int*)temp2) = _swap32(value);
temp2 += 3;
case 4:
int *temp2 = (int *)temp;
for (int i = 0; i < numElems; i ++)
int value = saturate(buffer[i] * 2147483648.0f, -2147483648.0f, 2147483647.0f);
temp2[i] = _swap32(value);
int res = (int)fwrite(temp, 1, numBytes, fptr);
if (res != numBytes)
ST_THROW_RT_ERROR("Error while writing to a wav file.");
bytesWritten += numBytes;

short bits_per_sample;
} WavFormat;
/// WAV audio file 'fact' section header
typedef struct
char fact_field[4];
int fact_len;
uint fact_sample_len;
} WavFact;
/// WAV audio file 'data' section header
typedef struct
@ -88,19 +96,40 @@ typedef struct
WavRiff riff;
WavFormat format;
WavFact fact;
WavData data;
} WavHeader;
/// Base class for processing WAV audio files.
class WavFileBase
/// Conversion working buffer;
char *convBuff;
int convBuffSize;
virtual ~WavFileBase();
/// Get pointer to conversion buffer of at min. given size
void *getConvBuffer(int sizeByte);
/// Class for reading WAV audio files.
class WavInFile
class WavInFile : protected WavFileBase
/// File pointer.
FILE *fptr;
/// Position within the audio stream
long position;
/// Counter of how many bytes of sample data have been read from the file.
uint dataRead;
long dataRead;
/// WAV header information
WavHeader header;
@ -158,12 +187,17 @@ public:
/// Get the audio file length in milliseconds
uint getLengthMS() const;
/// Returns how many milliseconds of audio have so far been read from the file
/// \return elapsed duration in milliseconds
uint getElapsedMS() const;
/// Reads audio samples from the WAV file. This routine works only for 8 bit samples.
/// Reads given number of elements from the file or if end-of-file reached, as many
/// elements as are left in the file.
/// \return Number of 8-bit integers read from the file.
int read(char *buffer, int maxElems);
int read(unsigned char *buffer, int maxElems);
/// Reads audio samples from the WAV file to 16 bit integer format. Reads given number
/// of elements from the file or if end-of-file reached, as many elements as are
@ -177,6 +211,7 @@ public:
/// Reads audio samples from the WAV file to floating point format, converting
/// sample values to range [-1,1[. Reads given number of elements from the file
/// or if end-of-file reached, as many elements as are left in the file.
/// Notice that reading in float format supports 8/16/24/32bit sample formats.
/// \return Number of elements read from the file.
int read(float *buffer, ///< Pointer to buffer where to read data.
@ -192,7 +227,7 @@ public:
/// Class for writing WAV audio files.
class WavOutFile
class WavOutFile : protected WavFileBase
/// Pointer to the WAV file
@ -230,7 +265,7 @@ public:
/// Write data to WAV file. This function works only with 8bit samples.
/// Throws a 'runtime_error' exception if writing to file fails.
void write(const char *buffer, ///< Pointer to sample data buffer.
void write(const unsigned char *buffer, ///< Pointer to sample data buffer.
int numElems ///< How many array items are to be written to file.

#define PI 3.141592655357989
#define TWOPI (2 * PI)
// define this to save AA filter coefficients to a file
#include <stdio.h>
static void _DEBUG_SAVE_AAFIR_COEFFS(SAMPLETYPE *coeffs, int len)
FILE *fptr = fopen("aa_filter_coeffs.txt", "wt");
if (fptr == NULL) return;
for (int i = 0; i < len; i ++)
double temp = coeffs[i];
fprintf(fptr, "%lf\n", temp);
* Implementation of the class 'AAFilter'
@ -99,7 +123,7 @@ void AAFilter::calculateCoeffs()
uint i;
double cntTemp, temp, tempCoeff,h, w;
double fc2, wc;
double wc;
double scaleCoeff, sum;
double *work;
@ -112,8 +136,7 @@ void AAFilter::calculateCoeffs()
work = new double[length];
coeffs = new SAMPLETYPE[length];
fc2 = 2.0 * cutoffFreq;
wc = PI * fc2;
wc = 2.0 * PI * cutoffFreq;
tempCoeff = TWOPI / (double)length;
sum = 0;
@ -124,7 +147,7 @@ void AAFilter::calculateCoeffs()
temp = cntTemp * wc;
if (temp != 0)
h = fc2 * sin(temp) / temp; // sinc function
h = sin(temp) / temp; // sinc function
@ -153,17 +176,21 @@ void AAFilter::calculateCoeffs()
for (i = 0; i < length; i ++)
// scale & round to nearest integer
temp = work[i] * scaleCoeff;
// scale & round to nearest integer
temp += (temp >= 0) ? 0.5 : -0.5;
// ensure no overfloods
assert(temp >= -32768 && temp <= 32767);
coeffs[i] = (SAMPLETYPE)temp;
// Set coefficients. Use divide factor 14 => divide result by 2^14 = 16384
pFIR->setCoefficients(coeffs, length, 14);
_DEBUG_SAVE_AAFIR_COEFFS(coeffs, length);
delete[] work;
delete[] coeffs;
@ -178,6 +205,31 @@ uint AAFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples
/// Applies the filter to the given src & dest pipes, so that processed amount of
/// samples get removed from src, and produced amount added to dest
/// Note : The amount of outputted samples is by value of 'filter length'
/// smaller than the amount of input samples.
uint AAFilter::evaluate(FIFOSampleBuffer &dest, FIFOSampleBuffer &src) const
const SAMPLETYPE *psrc;
uint numSrcSamples;
uint result;
int numChannels = src.getChannels();
assert(numChannels == dest.getChannels());
numSrcSamples = src.numSamples();
psrc = src.ptrBegin();
pdest = dest.ptrEnd(numSrcSamples);
result = pFIR->evaluate(pdest, psrc, numSrcSamples, numChannels);
return result;
uint AAFilter::getLength() const
return pFIR->getLength();

#define AAFilter_H
#include "STTypes.h"
#include "FIFOSampleBuffer.h"
namespace soundtouch
@ -84,6 +85,14 @@ public:
const SAMPLETYPE *src,
uint numSamples,
uint numChannels) const;
/// Applies the filter to the given src & dest pipes, so that processed amount of
/// samples get removed from src, and produced amount added to dest
/// Note : The amount of outputted samples is by value of 'filter length'
/// smaller than the amount of input samples.
uint evaluate(FIFOSampleBuffer &dest,
FIFOSampleBuffer &src) const;

@ -226,6 +226,7 @@ void BPMDetect::updateXCorr(int process_samples)
assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
pBuffer = buffer->ptrBegin();
#pragma omp parallel for
for (offs = windowStart; offs < windowLen; offs ++)

View File

@ -15,7 +15,7 @@
// Last changed : $Date: 2012-11-08 16:53:01 -0200 (qui, 08 nov 2012) $
// Last changed : $Date: 2012-11-08 18:53:01 +0000 (Thu, 08 Nov 2012) $
// File revision : $Revision: 4 $
@ -72,8 +72,7 @@ FIRFilter::~FIRFilter()
// Usual C-version of the filter routine for stereo sound
uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
uint i, j, end;
int j, end;
// when using floating point samples, use a scaler instead of a divider
// because division is much slower operation than multiplying.
@ -87,9 +86,12 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
end = 2 * (numSamples - length);
#pragma omp parallel for
for (j = 0; j < end; j += 2)
const SAMPLETYPE *ptr;
uint i;
suml = sumr = 0;
ptr = src + j;
@ -130,28 +132,31 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
// Usual C-version of the filter routine for mono sound
uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
uint i, j, end;
int j, end;
// when using floating point samples, use a scaler instead of a divider
// because division is much slower operation than multiplying.
double dScaler = 1.0 / (double)resultDivider;
assert(length != 0);
end = numSamples - length;
#pragma omp parallel for
for (j = 0; j < end; j ++)
const SAMPLETYPE *pSrc = src + j;
uint i;
sum = 0;
for (i = 0; i < length; i += 4)
// loop is unrolled by factor of 4 here for efficiency
sum += src[i + 0] * filterCoeffs[i + 0] +
src[i + 1] * filterCoeffs[i + 1] +
src[i + 2] * filterCoeffs[i + 2] +
src[i + 3] * filterCoeffs[i + 3];
sum += pSrc[i + 0] * filterCoeffs[i + 0] +
pSrc[i + 1] * filterCoeffs[i + 1] +
pSrc[i + 2] * filterCoeffs[i + 2] +
pSrc[i + 3] * filterCoeffs[i + 3];
sum >>= resultDivFactor;
@ -161,12 +166,67 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
sum *= dScaler;
dest[j] = (SAMPLETYPE)sum;
src ++;
return end;
uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels)
int j, end;
// when using floating point samples, use a scaler instead of a divider
// because division is much slower operation than multiplying.
double dScaler = 1.0 / (double)resultDivider;
assert(length != 0);
assert(src != NULL);
assert(dest != NULL);
assert(filterCoeffs != NULL);
assert(numChannels < 16);
end = numChannels * (numSamples - length);
#pragma omp parallel for
for (j = 0; j < end; j += numChannels)
const SAMPLETYPE *ptr;
uint c, i;
for (c = 0; c < numChannels; c ++)
sums[c] = 0;
ptr = src + j;
for (i = 0; i < length; i ++)
SAMPLETYPE coef=filterCoeffs[i];
for (c = 0; c < numChannels; c ++)
sums[c] += ptr[0] * coef;
ptr ++;
for (c = 0; c < numChannels; c ++)
sums[c] >>= resultDivFactor;
sums[c] *= dScaler;
dest[j+c] = (SAMPLETYPE)sums[c];
return numSamples - length;
// Set filter coeffiecients and length.
// Throws an exception if filter length isn't divisible by 8
@ -199,18 +259,27 @@ uint FIRFilter::getLength() const
// Note : The amount of outputted samples is by value of 'filter_length'
// smaller than the amount of input samples.
uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const
uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels)
assert(numChannels == 1 || numChannels == 2);
assert(length > 0);
assert(lengthDiv8 * 8 == length);
if (numSamples < length) return 0;
if (numChannels == 2)
if (numChannels == 1)
return evaluateFilterMono(dest, src, numSamples);
else if (numChannels == 2)
return evaluateFilterStereo(dest, src, numSamples);
} else {
return evaluateFilterMono(dest, src, numSamples);
assert(numChannels > 0);
return evaluateFilterMulti(dest, src, numSamples, numChannels);

@ -71,6 +71,7 @@ protected:
virtual uint evaluateFilterMono(SAMPLETYPE *dest,
const SAMPLETYPE *src,
uint numSamples) const;
virtual uint evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels);
@ -90,7 +91,7 @@ public:
uint evaluate(SAMPLETYPE *dest,
const SAMPLETYPE *src,
uint numSamples,
uint numChannels) const;
uint numChannels);
uint getLength() const;

// License :
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#include <stddef.h>
#include <math.h>
#include "InterpolateCubic.h"
#include "STTypes.h"
using namespace soundtouch;
// cubic interpolation coefficients
static const float _coeffs[]=
{ -0.5f, 1.0f, -0.5f, 0.0f,
1.5f, -2.5f, 0.0f, 1.0f,
-1.5f, 2.0f, 0.5f, 0.0f,
0.5f, -0.5f, 0.0f, 0.0f};
fract = 0;
void InterpolateCubic::resetRegisters()
fract = 0;
/// Transpose mono audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateCubic::transposeMono(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 4;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
float out;
const float x3 = 1.0f;
const float x2 = (float)fract; // x
const float x1 = x2*x2; // x^2
const float x0 = x1*x2; // x^3
float y0, y1, y2, y3;
assert(fract < 1.0);
y0 = _coeffs[0] * x0 + _coeffs[1] * x1 + _coeffs[2] * x2 + _coeffs[3] * x3;
y1 = _coeffs[4] * x0 + _coeffs[5] * x1 + _coeffs[6] * x2 + _coeffs[7] * x3;
y2 = _coeffs[8] * x0 + _coeffs[9] * x1 + _coeffs[10] * x2 + _coeffs[11] * x3;
y3 = _coeffs[12] * x0 + _coeffs[13] * x1 + _coeffs[14] * x2 + _coeffs[15] * x3;
out = y0 * psrc[0] + y1 * psrc[1] + y2 * psrc[2] + y3 * psrc[3];
pdest[i] = (SAMPLETYPE)out;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
psrc += whole;
srcCount += whole;
srcSamples = srcCount;
return i;
/// Transpose stereo audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateCubic::transposeStereo(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 4;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
const float x3 = 1.0f;
const float x2 = (float)fract; // x
const float x1 = x2*x2; // x^2
const float x0 = x1*x2; // x^3
float y0, y1, y2, y3;
float out0, out1;
assert(fract < 1.0);
y0 = _coeffs[0] * x0 + _coeffs[1] * x1 + _coeffs[2] * x2 + _coeffs[3] * x3;
y1 = _coeffs[4] * x0 + _coeffs[5] * x1 + _coeffs[6] * x2 + _coeffs[7] * x3;
y2 = _coeffs[8] * x0 + _coeffs[9] * x1 + _coeffs[10] * x2 + _coeffs[11] * x3;
y3 = _coeffs[12] * x0 + _coeffs[13] * x1 + _coeffs[14] * x2 + _coeffs[15] * x3;
out0 = y0 * psrc[0] + y1 * psrc[2] + y2 * psrc[4] + y3 * psrc[6];
out1 = y0 * psrc[1] + y1 * psrc[3] + y2 * psrc[5] + y3 * psrc[7];
pdest[2*i] = (SAMPLETYPE)out0;
pdest[2*i+1] = (SAMPLETYPE)out1;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
psrc += 2*whole;
srcCount += whole;
srcSamples = srcCount;
return i;
/// Transpose multi-channel audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateCubic::transposeMulti(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 4;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
const float x3 = 1.0f;
const float x2 = (float)fract; // x
const float x1 = x2*x2; // x^2
const float x0 = x1*x2; // x^3
float y0, y1, y2, y3;
assert(fract < 1.0);
y0 = _coeffs[0] * x0 + _coeffs[1] * x1 + _coeffs[2] * x2 + _coeffs[3] * x3;
y1 = _coeffs[4] * x0 + _coeffs[5] * x1 + _coeffs[6] * x2 + _coeffs[7] * x3;
y2 = _coeffs[8] * x0 + _coeffs[9] * x1 + _coeffs[10] * x2 + _coeffs[11] * x3;
y3 = _coeffs[12] * x0 + _coeffs[13] * x1 + _coeffs[14] * x2 + _coeffs[15] * x3;
for (int c = 0; c < numChannels; c ++)
float out;
out = y0 * psrc[c] + y1 * psrc[c + numChannels] + y2 * psrc[c + 2 * numChannels] + y3 * psrc[c + 3 * numChannels];
pdest[0] = (SAMPLETYPE)out;
pdest ++;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
psrc += numChannels*whole;
srcCount += whole;
srcSamples = srcCount;
return i;

// License :
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#ifndef _InterpolateCubic_H_
#define _InterpolateCubic_H_
#include "RateTransposer.h"
#include "STTypes.h"
namespace soundtouch
class InterpolateCubic : public TransposerBase
virtual void resetRegisters();
virtual int transposeMono(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
virtual int transposeStereo(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
virtual int transposeMulti(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
float fract;

// License :
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#include <assert.h>
#include <stdlib.h>
#include "InterpolateLinear.h"
using namespace soundtouch;
// InterpolateLinearInteger - integer arithmetic implementation
/// fixed-point interpolation routine precision
#define SCALE 65536
// Constructor
InterpolateLinearInteger::InterpolateLinearInteger() : TransposerBase()
// Notice: use local function calling syntax for sake of clarity,
// to indicate the fact that C++ constructor can't call virtual functions.
void InterpolateLinearInteger::resetRegisters()
iFract = 0;
// Transposes the sample rate of the given samples using linear interpolation.
// 'Mono' version of the routine. Returns the number of samples returned in
// the "dest" buffer
int InterpolateLinearInteger::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 1;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
assert(iFract < SCALE);
temp = (SCALE - iFract) * src[0] + iFract * src[1];
dest[i] = (SAMPLETYPE)(temp / SCALE);
iFract += iRate;
int iWhole = iFract / SCALE;
iFract -= iWhole * SCALE;
srcCount += iWhole;
src += iWhole;
srcSamples = srcCount;
return i;
// Transposes the sample rate of the given samples using linear interpolation.
// 'Stereo' version of the routine. Returns the number of samples returned in
// the "dest" buffer
int InterpolateLinearInteger::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 1;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
assert(iFract < SCALE);
temp0 = (SCALE - iFract) * src[0] + iFract * src[2];
temp1 = (SCALE - iFract) * src[1] + iFract * src[3];
dest[0] = (SAMPLETYPE)(temp0 / SCALE);
dest[1] = (SAMPLETYPE)(temp1 / SCALE);
dest += 2;
iFract += iRate;
int iWhole = iFract / SCALE;
iFract -= iWhole * SCALE;
srcCount += iWhole;
src += 2*iWhole;
srcSamples = srcCount;
return i;
int InterpolateLinearInteger::transposeMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 1;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
assert(iFract < SCALE);
vol1 = (SCALE - iFract);
for (int c = 0; c < numChannels; c ++)
temp = vol1 * src[c] + iFract * src[c + numChannels];
dest[0] = (SAMPLETYPE)(temp / SCALE);
dest ++;
iFract += iRate;
int iWhole = iFract / SCALE;
iFract -= iWhole * SCALE;
srcCount += iWhole;
src += iWhole * numChannels;
srcSamples = srcCount;
return i;
// Sets new target iRate. Normal iRate = 1.0, smaller values represent slower
// iRate, larger faster iRates.
void InterpolateLinearInteger::setRate(float newRate)
iRate = (int)(newRate * SCALE + 0.5f);
// InterpolateLinearFloat - floating point arithmetic implementation
// Constructor
InterpolateLinearFloat::InterpolateLinearFloat() : TransposerBase()
// Notice: use local function calling syntax for sake of clarity,
// to indicate the fact that C++ constructor can't call virtual functions.
void InterpolateLinearFloat::resetRegisters()
fract = 0;
// Transposes the sample rate of the given samples using linear interpolation.
// 'Mono' version of the routine. Returns the number of samples returned in
// the "dest" buffer
int InterpolateLinearFloat::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 1;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
double out;
assert(fract < 1.0);
out = (1.0 - fract) * src[0] + fract * src[1];
dest[i] = (SAMPLETYPE)out;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
src += whole;
srcCount += whole;
srcSamples = srcCount;
return i;
// Transposes the sample rate of the given samples using linear interpolation.
// 'Mono' version of the routine. Returns the number of samples returned in
// the "dest" buffer
int InterpolateLinearFloat::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 1;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
double out0, out1;
assert(fract < 1.0);
out0 = (1.0 - fract) * src[0] + fract * src[2];
out1 = (1.0 - fract) * src[1] + fract * src[3];
dest[2*i] = (SAMPLETYPE)out0;
dest[2*i+1] = (SAMPLETYPE)out1;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
src += 2*whole;
srcCount += whole;
srcSamples = srcCount;
return i;
int InterpolateLinearFloat::transposeMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 1;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
float temp, vol1;
vol1 = (1.0f- fract);
for (int c = 0; c < numChannels; c ++)
temp = vol1 * src[c] + fract * src[c + numChannels];
*dest = (SAMPLETYPE)temp;
dest ++;
fract += rate;
int iWhole = (int)fract;
fract -= iWhole;
srcCount += iWhole;
src += iWhole * numChannels;
srcSamples = srcCount;
return i;

// License :
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#ifndef _InterpolateLinear_H_
#define _InterpolateLinear_H_
#include "RateTransposer.h"
#include "STTypes.h"
namespace soundtouch
/// Linear transposer class that uses integer arithmetics
class InterpolateLinearInteger : public TransposerBase
int iFract;
int iRate;
virtual void resetRegisters();
virtual int transposeMono(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
virtual int transposeStereo(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
virtual int transposeMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, int &srcSamples);
/// Sets new target rate. Normal rate = 1.0, smaller values represent slower
/// rate, larger faster rates.
virtual void setRate(float newRate);
/// Linear transposer class that uses floating point arithmetics
class InterpolateLinearFloat : public TransposerBase
float fract;
virtual void resetRegisters();
virtual int transposeMono(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
virtual int transposeStereo(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
virtual int transposeMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, int &srcSamples);

/// Author : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
// $Id: InterpolateShannon.cpp 195 2014-04-06 15:57:21Z oparviai $
// License :
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#include <math.h>
#include "InterpolateShannon.h"
#include "STTypes.h"
using namespace soundtouch;
/// Kaiser window with beta = 2.0
/// Values scaled down by 5% to avoid overflows
static const double _kaiser8[8] =
fract = 0;
void InterpolateShannon::resetRegisters()
fract = 0;
#define PI 3.1415926536
#define sinc(x) (sin(PI * (x)) / (PI * (x)))
/// Transpose mono audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateShannon::transposeMono(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 8;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
double out;
assert(fract < 1.0);
out = psrc[0] * sinc(-3.0 - fract) * _kaiser8[0];
out += psrc[1] * sinc(-2.0 - fract) * _kaiser8[1];
out += psrc[2] * sinc(-1.0 - fract) * _kaiser8[2];
if (fract < 1e-6)
out += psrc[3] * _kaiser8[3]; // sinc(0) = 1
out += psrc[3] * sinc(- fract) * _kaiser8[3];
out += psrc[4] * sinc( 1.0 - fract) * _kaiser8[4];
out += psrc[5] * sinc( 2.0 - fract) * _kaiser8[5];
out += psrc[6] * sinc( 3.0 - fract) * _kaiser8[6];
out += psrc[7] * sinc( 4.0 - fract) * _kaiser8[7];
pdest[i] = (SAMPLETYPE)out;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
psrc += whole;
srcCount += whole;
srcSamples = srcCount;
return i;
/// Transpose stereo audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateShannon::transposeStereo(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
int i;
int srcSampleEnd = srcSamples - 8;
int srcCount = 0;
i = 0;
while (srcCount < srcSampleEnd)
double out0, out1, w;
assert(fract < 1.0);
w = sinc(-3.0 - fract) * _kaiser8[0];
out0 = psrc[0] * w; out1 = psrc[1] * w;
w = sinc(-2.0 - fract) * _kaiser8[1];
out0 += psrc[2] * w; out1 += psrc[3] * w;
w = sinc(-1.0 - fract) * _kaiser8[2];
out0 += psrc[4] * w; out1 += psrc[5] * w;
w = _kaiser8[3] * ((fract < 1e-5) ? 1.0 : sinc(- fract)); // sinc(0) = 1
out0 += psrc[6] * w; out1 += psrc[7] * w;
w = sinc( 1.0 - fract) * _kaiser8[4];
out0 += psrc[8] * w; out1 += psrc[9] * w;
w = sinc( 2.0 - fract) * _kaiser8[5];
out0 += psrc[10] * w; out1 += psrc[11] * w;
w = sinc( 3.0 - fract) * _kaiser8[6];
out0 += psrc[12] * w; out1 += psrc[13] * w;
w = sinc( 4.0 - fract) * _kaiser8[7];
out0 += psrc[14] * w; out1 += psrc[15] * w;
pdest[2*i] = (SAMPLETYPE)out0;
pdest[2*i+1] = (SAMPLETYPE)out1;
i ++;
// update position fraction
fract += rate;
// update whole positions
int whole = (int)fract;
fract -= whole;
psrc += 2*whole;
srcCount += whole;
srcSamples = srcCount;
return i;
/// Transpose stereo audio. Returns number of produced output samples, and
/// updates "srcSamples" to amount of consumed source samples
int InterpolateShannon::transposeMulti(SAMPLETYPE *pdest,
const SAMPLETYPE *psrc,
int &srcSamples)
// not implemented
return 0;

// License :
// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#ifndef _InterpolateShannon_H_
#define _InterpolateShannon_H_
#include "RateTransposer.h"
#include "STTypes.h"
namespace soundtouch
class InterpolateShannon : public TransposerBase
void resetRegisters();
int transposeMono(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
int transposeStereo(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
int transposeMulti(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples);
float fract;

@ -192,11 +192,21 @@ double PeakFinder::getPeakCenter(const float *data, int peakpos) const
gp1 = findGround(data, peakpos, -1);
gp2 = findGround(data, peakpos, 1);
groundLevel = 0.5f * (data[gp1] + data[gp2]);
peakLevel = data[peakpos];
if (gp1 == gp2)
// avoid rounding errors when all are equal
assert(gp1 == peakpos);
cutLevel = groundLevel = peakLevel;
} else {
// get average of the ground levels
groundLevel = 0.5f * (data[gp1] + data[gp2]);
// calculate 70%-level of the peak
cutLevel = 0.70f * peakLevel + 0.30f * groundLevel;
// find mid-level crossings
crosspos1 = findCrossingLevel(data, cutLevel, peakpos, -1);
crosspos2 = findCrossingLevel(data, cutLevel, peakpos, 1);

View File

// SoundTouch audio processing library
// Copyright (c) Olli Parviainen
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#include <memory.h>
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include "RateTransposer.h"
#include "InterpolateLinear.h"
#include "InterpolateCubic.h"
#include "InterpolateShannon.h"
#include "AAFilter.h"
using namespace soundtouch;
// Define default interpolation algorithm here
TransposerBase::ALGORITHM TransposerBase::algorithm = TransposerBase::CUBIC;
// Constructor
RateTransposer::RateTransposer() : FIFOProcessor(&outputBuffer)
bUseAAFilter = true;
// Instantiates the anti-alias filter
pAAFilter = new AAFilter(64);
pTransposer = TransposerBase::newInstance();
delete pAAFilter;
delete pTransposer;
/// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable
void RateTransposer::enableAAFilter(bool newMode)
bUseAAFilter = newMode;
/// Returns nonzero if anti-alias filter is enabled.
bool RateTransposer::isAAFilterEnabled() const
return bUseAAFilter;
AAFilter *RateTransposer::getAAFilter()
return pAAFilter;
// Sets new target iRate. Normal iRate = 1.0, smaller values represent slower
// iRate, larger faster iRates.
void RateTransposer::setRate(float newRate)
double fCutoff;
// design a new anti-alias filter
if (newRate > 1.0f)
fCutoff = 0.5f / newRate;
fCutoff = 0.5f * newRate;
// Adds 'nSamples' pcs of samples from the 'samples' memory position into
// the input of the object.
void RateTransposer::putSamples(const SAMPLETYPE *samples, uint nSamples)
processSamples(samples, nSamples);
// Transposes sample rate by applying anti-alias filter to prevent folding.
// Returns amount of samples returned in the "dest" buffer.
// The maximum amount of samples that can be returned at a time is set by
// the 'set_returnBuffer_size' function.
void RateTransposer::processSamples(const SAMPLETYPE *src, uint nSamples)
uint count;
if (nSamples == 0) return;
// Store samples to input buffer
inputBuffer.putSamples(src, nSamples);
// If anti-alias filter is turned off, simply transpose without applying
// the filter
if (bUseAAFilter == false)
count = pTransposer->transpose(outputBuffer, inputBuffer);
// Transpose with anti-alias filter
if (pTransposer->rate < 1.0f)
// If the parameter 'Rate' value is smaller than 1, first transpose
// the samples and then apply the anti-alias filter to remove aliasing.
// Transpose the samples, store the result to end of "midBuffer"
pTransposer->transpose(midBuffer, inputBuffer);
// Apply the anti-alias filter for transposed samples in midBuffer
pAAFilter->evaluate(outputBuffer, midBuffer);
// If the parameter 'Rate' value is larger than 1, first apply the
// anti-alias filter to remove high frequencies (prevent them from folding
// over the lover frequencies), then transpose.
// Apply the anti-alias filter for samples in inputBuffer
pAAFilter->evaluate(midBuffer, inputBuffer);
// Transpose the AA-filtered samples in "midBuffer"
pTransposer->transpose(outputBuffer, midBuffer);
// Sets the number of channels, 1 = mono, 2 = stereo
void RateTransposer::setChannels(int nChannels)
assert(nChannels > 0);
if (pTransposer->numChannels == nChannels) return;
// Clears all the samples in the object
void RateTransposer::clear()
// Returns nonzero if there aren't any samples available for outputting.
int RateTransposer::isEmpty() const
int res;
res = FIFOProcessor::isEmpty();
if (res == 0) return 0;
return inputBuffer.isEmpty();
// TransposerBase - Base class for interpolation
// static function to set interpolation algorithm
void TransposerBase::setAlgorithm(TransposerBase::ALGORITHM a)
TransposerBase::algorithm = a;
// Transposes the sample rate of the given samples using linear interpolation.
// Returns the number of samples returned in the "dest" buffer
int TransposerBase::transpose(FIFOSampleBuffer &dest, FIFOSampleBuffer &src)
int numSrcSamples = src.numSamples();
int sizeDemand = (int)((float)numSrcSamples / rate) + 8;
int numOutput;
SAMPLETYPE *psrc = src.ptrBegin();
SAMPLETYPE *pdest = dest.ptrEnd(sizeDemand);
if (numChannels == 1)
numOutput = transposeMono(pdest, psrc, numSrcSamples);
else if (numChannels == 2)
numOutput = transposeStereo(pdest, psrc, numSrcSamples);
assert(numChannels > 0);
numOutput = transposeMulti(pdest, psrc, numSrcSamples);
return numOutput;
numChannels = 0;
rate = 1.0f;
void TransposerBase::setChannels(int channels)
numChannels = channels;
void TransposerBase::setRate(float newRate)
rate = newRate;
// static factory function
TransposerBase *TransposerBase::newInstance()
// Notice: For integer arithmetics support only linear algorithm (due to simplest calculus)
return ::new InterpolateLinearInteger;
switch (algorithm)
case LINEAR:
return new InterpolateLinearFloat;
case CUBIC:
return new InterpolateCubic;
return new InterpolateShannon;
return NULL;

@ -55,50 +55,71 @@
namespace soundtouch
/// Abstract base class for transposer implementations (linear, advanced vs integer, float etc)
class TransposerBase
virtual void resetRegisters() = 0;
virtual int transposeMono(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples) = 0;
virtual int transposeStereo(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples) = 0;
virtual int transposeMulti(SAMPLETYPE *dest,
const SAMPLETYPE *src,
int &srcSamples) = 0;
static ALGORITHM algorithm;
float rate;
int numChannels;
virtual ~TransposerBase();
virtual int transpose(FIFOSampleBuffer &dest, FIFOSampleBuffer &src);
virtual void setRate(float newRate);
virtual void setChannels(int channels);
// static factory function
static TransposerBase *newInstance();
// static function to set interpolation algorithm
static void setAlgorithm(ALGORITHM a);
/// A common linear samplerate transposer class.
/// Note: Use function "RateTransposer::newInstance()" to create a new class
/// instance instead of the "new" operator; that function automatically
/// chooses a correct implementation depending on if integer or floating
/// arithmetics are to be used.
class RateTransposer : public FIFOProcessor
/// Anti-alias filter object
AAFilter *pAAFilter;
float fRate;
int numChannels;
TransposerBase *pTransposer;
/// Buffer for collecting samples to feed the anti-alias filter between
/// two batches
FIFOSampleBuffer storeBuffer;
FIFOSampleBuffer inputBuffer;
/// Buffer for keeping samples between transposing & anti-alias filter
FIFOSampleBuffer tempBuffer;
FIFOSampleBuffer midBuffer;
/// Output sample buffer
FIFOSampleBuffer outputBuffer;
BOOL bUseAAFilter;
bool bUseAAFilter;
virtual void resetRegisters() = 0;
virtual uint transposeStereo(SAMPLETYPE *dest,
const SAMPLETYPE *src,
uint numSamples) = 0;
virtual uint transposeMono(SAMPLETYPE *dest,
const SAMPLETYPE *src,
uint numSamples) = 0;
inline uint transpose(SAMPLETYPE *dest,
const SAMPLETYPE *src,
uint numSamples);
void downsample(const SAMPLETYPE *src,
uint numSamples);
void upsample(const SAMPLETYPE *src,
uint numSamples);
/// Transposes sample rate by applying anti-alias filter to prevent folding.
/// Returns amount of samples returned in the "dest" buffer.
@ -107,34 +128,33 @@ protected:
void processSamples(const SAMPLETYPE *src,
uint numSamples);
virtual ~RateTransposer();
/// Operator 'new' is overloaded so that it automatically creates a suitable instance
/// depending on if we're to use integer or floating point arithmetics.
static void *operator new(size_t s);
// static void *operator new(size_t s);
/// Use this function instead of "new" operator to create a new instance of this class.
/// This function automatically chooses a correct implementation, depending on if
/// integer ot floating point arithmetics are to be used.
static RateTransposer *newInstance();
// static RateTransposer *newInstance();
/// Returns the output buffer object
FIFOSamplePipe *getOutput() { return &outputBuffer; };
/// Returns the store buffer object
FIFOSamplePipe *getStore() { return &storeBuffer; };
// FIFOSamplePipe *getStore() { return &storeBuffer; };
/// Return anti-alias filter object
AAFilter *getAAFilter();
/// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable
void enableAAFilter(BOOL newMode);
void enableAAFilter(bool newMode);
/// Returns nonzero if anti-alias filter is enabled.
BOOL isAAFilterEnabled() const;
bool isAAFilterEnabled() const;
/// Sets new target rate. Normal rate = 1.0, smaller values represent slower
/// rate, larger faster rates.

@ -97,7 +97,7 @@ SoundTouch::SoundTouch()
// Initialize rate transposer and tempo changer instances
pRateTransposer = RateTransposer::newInstance();
pRateTransposer = new RateTransposer();
pTDStretch = TDStretch::newInstance();
@ -111,7 +111,7 @@ SoundTouch::SoundTouch()
channels = 0;
bSrateSet = FALSE;
bSrateSet = false;
@ -143,10 +143,11 @@ uint SoundTouch::getVersionId()
// Sets the number of channels, 1 = mono, 2 = stereo
void SoundTouch::setChannels(uint numChannels)
if (numChannels != 1 && numChannels != 2)
/*if (numChannels != 1 && numChannels != 2)
ST_THROW_RT_ERROR("Illegal number of channels");
//ST_THROW_RT_ERROR("Illegal number of channels");
channels = numChannels;
@ -254,7 +255,7 @@ void SoundTouch::calcEffectiveRateAndTempo()
tempoOut = pTDStretch->getOutput();
// move samples in pitch transposer's store buffer to tempo changer's input
// deprecated : pTDStretch->moveSamples(*pRateTransposer->getStore());
output = pTDStretch;
@ -282,7 +283,7 @@ void SoundTouch::calcEffectiveRateAndTempo()
// Sets sample rate.
void SoundTouch::setSampleRate(uint srate)
bSrateSet = TRUE;
bSrateSet = true;
// set sample rate, leave other tempo changer parameters as they are.
@ -292,7 +293,7 @@ void SoundTouch::setSampleRate(uint srate)
// the input of the object.
void SoundTouch::putSamples(const SAMPLETYPE *samples, uint nSamples)
if (bSrateSet == FALSE)
if (bSrateSet == false)
ST_THROW_RT_ERROR("SoundTouch : Sample rate not defined");
@ -347,7 +348,7 @@ void SoundTouch::flush()
int i;
int nUnprocessed;
int nOut;
SAMPLETYPE buff[64*2]; // note: allocate 2*64 to cater 64 sample frames of stereo sound
SAMPLETYPE *buff = new SAMPLETYPE[64 * channels];
// check how many samples still await processing, and scale
// that by tempo & rate to get expected output sample count
@ -377,6 +378,8 @@ void SoundTouch::flush()
delete[] buff;
// Clear working buffers
@ -387,7 +390,7 @@ void SoundTouch::flush()
// Changes a setting controlling the processing system behaviour. See the
// 'SETTING_...' defines for available setting ID's.
BOOL SoundTouch::setSetting(int settingId, int value)
bool SoundTouch::setSetting(int settingId, int value)
int sampleRate, sequenceMs, seekWindowMs, overlapMs;
@ -398,36 +401,36 @@ BOOL SoundTouch::setSetting(int settingId, int value)
// enables / disabless anti-alias filter
pRateTransposer->enableAAFilter((value != 0) ? TRUE : FALSE);
return TRUE;
pRateTransposer->enableAAFilter((value != 0) ? true : false);
return true;
// sets anti-alias filter length
return TRUE;
return true;
// enables / disables tempo routine quick seeking algorithm
pTDStretch->enableQuickSeek((value != 0) ? TRUE : FALSE);
return TRUE;
pTDStretch->enableQuickSeek((value != 0) ? true : false);
return true;
// change time-stretch sequence duration parameter
pTDStretch->setParameters(sampleRate, value, seekWindowMs, overlapMs);
return TRUE;
return true;
// change time-stretch seek window length parameter
pTDStretch->setParameters(sampleRate, sequenceMs, value, overlapMs);
return TRUE;
return true;
// change time-stretch overlap length parameter
pTDStretch->setParameters(sampleRate, sequenceMs, seekWindowMs, value);
return TRUE;
return true;
default :
return FALSE;
return false;

View File

@ -13,10 +13,10 @@
// Last changed : $Date: 2012-11-08 16:53:01 -0200 (qui, 08 nov 2012) $
// Last changed : $Date: 2015-02-22 15:07:12 +0000 (Sun, 22 Feb 2015) $
// File revision : $Revision: 1.12 $
// $Id: TDStretch.cpp 160 2012-11-08 18:53:01Z oparviai $
// $Id: TDStretch.cpp 205 2015-02-22 15:07:12Z oparviai $
@ -51,8 +51,6 @@
#include "cpu_detect.h"
#include "TDStretch.h"
#include <stdio.h>
using namespace soundtouch;
#define max(x, y) (((x) > (y)) ? (x) : (y))
@ -86,15 +84,15 @@ static const short _scanOffsets[5][24]={
TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
bQuickSeek = FALSE;
bQuickSeek = false;
channels = 2;
pMidBuffer = NULL;
pMidBufferUnaligned = NULL;
overlapLength = 0;
bAutoSeqSetting = TRUE;
bAutoSeekSetting = TRUE;
bAutoSeqSetting = true;
bAutoSeekSetting = true;
// outDebt = 0;
skipFract = 0;
@ -134,23 +132,23 @@ void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
if (aSequenceMS > 0)
this->sequenceMs = aSequenceMS;
bAutoSeqSetting = FALSE;
bAutoSeqSetting = false;
else if (aSequenceMS == 0)
// if zero, use automatic setting
bAutoSeqSetting = TRUE;
bAutoSeqSetting = true;
if (aSeekWindowMS > 0)
this->seekWindowMs = aSeekWindowMS;
bAutoSeekSetting = FALSE;
bAutoSeekSetting = false;
else if (aSeekWindowMS == 0)
// if zero, use automatic setting
bAutoSeekSetting = TRUE;
bAutoSeekSetting = true;
@ -159,7 +157,6 @@ void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
// set tempo to recalculate 'sampleReq'
@ -212,7 +209,7 @@ void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
void TDStretch::clearMidBuffer()
memset(pMidBuffer, 0, 2 * sizeof(SAMPLETYPE) * overlapLength);
memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
@ -234,14 +231,14 @@ void TDStretch::clear()
// Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
// to enable
void TDStretch::enableQuickSeek(BOOL enable)
void TDStretch::enableQuickSeek(bool enable)
bQuickSeek = enable;
// Returns nonzero if the quick seeking algorithm is enabled.
BOOL TDStretch::isQuickSeekEnabled() const
bool TDStretch::isQuickSeekEnabled() const
return bQuickSeek;
@ -265,13 +262,22 @@ int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
// of 'ovlPos'.
inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
if (channels == 2)
if (channels == 1)
// mono sound.
overlapMono(pOutput, pInput + ovlPos);
else if (channels == 2)
// stereo sound
overlapStereo(pOutput, pInput + 2 * ovlPos);
} else {
// mono sound.
overlapMono(pOutput, pInput + ovlPos);
assert(channels > 0);
overlapMulti(pOutput, pInput + channels * ovlPos);
@ -286,30 +292,50 @@ inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, ui
int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
int bestOffs;
double bestCorr, corr;
double bestCorr;
int i;
double norm;
bestCorr = FLT_MIN;
bestOffs = 0;
// Scans for the best correlation value by testing each possible position
// over the permitted range.
for (i = 0; i < seekLength; i ++)
bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
#pragma omp parallel for
for (i = 1; i < seekLength; i ++)
// Calculates correlation value for the mixing position corresponding
// to 'i'
corr = calcCrossCorr(refPos + channels * i, pMidBuffer);
double corr;
// Calculates correlation value for the mixing position corresponding to 'i'
#ifdef _OPENMP
// in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
// iterate the loop in sequential order
corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
// In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
// as "calcCrossCorr", but saves time by reusing & updating previously stored
// "norm" value
corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
// heuristic rule to slightly favour values close to mid of the range
double tmp = (double)(2 * i - seekLength) / (double)seekLength;
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
// Checks for the highest correlation value
if (corr > bestCorr)
// For optimal performance, enter critical section only in case that best value found.
// in such case repeat 'if' condition as it's possible that parallel execution may have
// updated the bestCorr value in the mean time
#pragma omp critical
if (corr > bestCorr)
bestCorr = corr;
bestOffs = i;
// clear cross correlation routine state if necessary (is so e.g. in MMX routines).
@ -346,12 +372,13 @@ int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
j = 0;
while (_scanOffsets[scanCount][j])
double norm;
tempOffset = corrOffset + _scanOffsets[scanCount][j];
if (tempOffset >= seekLength) break;
// Calculates correlation value for the mixing position corresponding
// to 'tempOffset'
corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer);
corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm);
// heuristic rule to slightly favour values close to mid of the range
double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
@ -458,11 +485,15 @@ void TDStretch::setChannels(int numChannels)
assert(numChannels > 0);
if (channels == numChannels) return;
assert(numChannels == 1 || numChannels == 2);
// assert(numChannels == 1 || numChannels == 2);
channels = numChannels;
// re-init overlap/buffer
@ -498,7 +529,6 @@ void TDStretch::processNominalTempo()
#include <stdio.h>
// Processes as many processing frames of the samples 'inputBuffer', store
// the result into 'outputBuffer'
@ -588,7 +618,7 @@ void TDStretch::acceptNewOverlapLength(int newOverlapLength)
delete[] pMidBufferUnaligned;
pMidBufferUnaligned = new SAMPLETYPE[overlapLength * 2 + 16 / sizeof(SAMPLETYPE)];
pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
// ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
@ -666,6 +696,27 @@ void TDStretch::overlapStereo(short *poutput, const short *input) const
// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
// version of the routine.
void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const
int i=0;
for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --)
for (int c = 0; c < channels; c ++)
poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
// Calculates the x having the closest 2^x value for the given value
static int _getClosest2Power(double value)
@ -699,32 +750,72 @@ void TDStretch::calculateOverlapLength(int aoverlapMs)
double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare) const
double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const
long corr;
long norm;
long lnorm;
int i;
corr = norm = 0;
corr = lnorm = 0;
// Same routine for stereo and mono. For stereo, unroll loop for better
// efficiency and gives slightly better resolution against rounding.
// For mono it same routine, just unrolls loop by factor of 4
for (i = 0; i < channels * overlapLength; i += 4)
corr += (mixingPos[i] * compare[i] +
mixingPos[i + 1] * compare[i + 1] +
mixingPos[i + 2] * compare[i + 2] +
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
corr += (mixingPos[i + 2] * compare[i + 2] +
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
norm += (mixingPos[i] * mixingPos[i] +
mixingPos[i + 1] * mixingPos[i + 1] +
mixingPos[i + 2] * mixingPos[i + 2] +
lnorm += (mixingPos[i] * mixingPos[i] +
mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
lnorm += (mixingPos[i + 2] * mixingPos[i + 2] +
mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation
if (norm == 0) norm = 1; // to avoid div by zero
return (double)corr / sqrt((double)norm);
norm = (double)lnorm;
return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const
long corr;
long lnorm;
int i;
// cancel first normalizer tap from previous round
lnorm = 0;
for (i = 1; i <= channels; i ++)
lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits;
corr = 0;
// Same routine for stereo and mono. For stereo, unroll loop for better
// efficiency and gives slightly better resolution against rounding.
// For mono it same routine, just unrolls loop by factor of 4
for (i = 0; i < channels * overlapLength; i += 4)
corr += (mixingPos[i] * compare[i] +
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
corr += (mixingPos[i + 2] * compare[i + 2] +
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
// update normalizer with last samples of this round
for (int j = 0; j < channels; j ++)
i --;
lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits;
norm += (double)lnorm;
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation
return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
@ -760,6 +851,34 @@ void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
// Overlaps samples in 'midBuffer' with the samples in 'input'.
void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
int i;
float fScale;
float f1;
float f2;
fScale = 1.0f / (float)overlapLength;
f1 = 0;
f2 = 1.0f;
for (int i2 = 0; i2 < overlapLength; i2 ++)
// note: Could optimize this slightly by taking into account that always channels > 2
for (int c = 0; c < channels; c ++)
pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
f1 += fScale;
f2 -= fScale;
/// Calculates overlapInMsec period length in samples.
void TDStretch::calculateOverlapLength(int overlapInMsec)
@ -776,7 +895,8 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)
double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare) const
/// Calculate cross-correlation
double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) const
double corr;
double norm;
@ -801,8 +921,44 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare) co
mixingPos[i + 3] * mixingPos[i + 3];
if (norm < 1e-9) norm = 1.0; // to avoid div by zero
return corr / sqrt(norm);
anorm = norm;
return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const
double corr;
int i;
corr = 0;
// cancel first normalizer tap from previous round
for (i = 1; i <= channels; i ++)
norm -= mixingPos[-i] * mixingPos[-i];
// Same routine for stereo and mono. For Stereo, unroll by factor of 2.
// For mono it's same routine yet unrollsd by factor of 4.
for (i = 0; i < channels * overlapLength; i += 4)
corr += mixingPos[i] * compare[i] +
mixingPos[i + 1] * compare[i + 1] +
mixingPos[i + 2] * compare[i + 2] +
mixingPos[i + 3] * compare[i + 3];
// update normalizer with last samples of this round
for (int j = 0; j < channels; j ++)
i --;
norm += mixingPos[i] * mixingPos[i];
return corr / sqrt((norm < 1e-9 ? 1.0 : norm));

@ -125,21 +125,22 @@ protected:
float skipFract;
FIFOSampleBuffer outputBuffer;
FIFOSampleBuffer inputBuffer;
BOOL bQuickSeek;
bool bQuickSeek;
int sampleRate;
int sequenceMs;
int seekWindowMs;
int overlapMs;
BOOL bAutoSeqSetting;
BOOL bAutoSeekSetting;
bool bAutoSeqSetting;
bool bAutoSeekSetting;
void acceptNewOverlapLength(int newOverlapLength);
virtual void clearCrossCorrState();
void calculateOverlapLength(int overlapMs);
virtual double calcCrossCorr(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
virtual double calcCrossCorr(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare, double &norm) const;
virtual double calcCrossCorrAccumulate(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare, double &norm) const;
virtual int seekBestOverlapPositionFull(const SAMPLETYPE *refPos);
virtual int seekBestOverlapPositionQuick(const SAMPLETYPE *refPos);
@ -147,6 +148,7 @@ protected:
virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const;
virtual void overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const;
virtual void overlapMulti(SAMPLETYPE *output, const SAMPLETYPE *input) const;
void clearMidBuffer();
void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const;
@ -193,10 +195,10 @@ public:
/// Enables/disables the quick position seeking algorithm. Zero to disable,
/// nonzero to enable
void enableQuickSeek(BOOL enable);
void enableQuickSeek(bool enable);
/// Returns nonzero if the quick seeking algorithm is enabled.
BOOL isQuickSeekEnabled() const;
bool isQuickSeekEnabled() const;
/// Sets routine control parameters. These control are certain time constants
/// defining how the sound is stretched to the desired duration.
@ -247,7 +249,8 @@ public:
class TDStretchMMX : public TDStretch
double calcCrossCorr(const short *mixingPos, const short *compare) const;
double calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const;
double calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const;
virtual void overlapStereo(short *output, const short *input) const;
virtual void clearCrossCorrState();
@ -259,7 +262,8 @@ public:
class TDStretchSSE : public TDStretch
double calcCrossCorr(const float *mixingPos, const float *compare) const;
double calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const;
double calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const;

View File

@ -11,10 +11,10 @@
// Last changed : $Date: 2012-11-08 16:44:37 -0200 (qui, 08 nov 2012) $
// Last changed : $Date: 2014-01-07 18:24:28 +0000 (Tue, 07 Jan 2014) $
// File revision : $Revision: 4 $
// $Id: cpu_detect_x86.cpp 159 2012-11-08 18:44:37Z oparviai $
// $Id: cpu_detect_x86.cpp 183 2014-01-07 18:24:28Z oparviai $
@ -42,6 +42,7 @@
#include "cpu_detect.h"
#include "STTypes.h"
#if defined(__GNUC__) && defined(__i386__)
@ -50,13 +51,13 @@
#elif defined(_M_IX86)
// windows non-gcc
#include <intrin.h>
#define bit_MMX (1 << 23)
#define bit_SSE (1 << 25)
#define bit_SSE2 (1 << 26)

@ -68,7 +68,7 @@ using namespace soundtouch;
// Calculates cross correlation of two buffers
double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2) const
double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2, double &dnorm) const
const __m64 *pVec1, *pVec2;
__m64 shifter;
@ -93,19 +93,19 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2) const
// _mm_add_pi32 : 2*32bit add
// _m_psrad : 32bit right-shift
temp = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]),
_mm_madd_pi16(pVec1[1], pVec2[1]));
temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]),
_mm_madd_pi16(pVec1[1], pVec1[1]));
accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter));
normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter));
temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[1], pVec2[1]), shifter));
temp2 = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[1], pVec1[1]), shifter));
accu = _mm_add_pi32(accu, temp);
normaccu = _mm_add_pi32(normaccu, temp2);
temp = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]),
_mm_madd_pi16(pVec1[3], pVec2[3]));
temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]),
_mm_madd_pi16(pVec1[3], pVec1[3]));
accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter));
normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter));
temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[3], pVec2[3]), shifter));
temp2 = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[3], pVec1[3]), shifter));
accu = _mm_add_pi32(accu, temp);
normaccu = _mm_add_pi32(normaccu, temp2);
pVec1 += 4;
pVec2 += 4;
@ -125,14 +125,81 @@ double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2) const
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation
if (norm == 0) norm = 1; // to avoid div by zero
dnorm = (double)norm;
return (double)corr / sqrt((double)norm);
return (double)corr / sqrt(dnorm < 1e-9 ? 1.0 : dnorm);
// Note: Warning about the missing EMMS instruction is harmless
// as it'll be called elsewhere.
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
double TDStretchMMX::calcCrossCorrAccumulate(const short *pV1, const short *pV2, double &dnorm) const
const __m64 *pVec1, *pVec2;
__m64 shifter;
__m64 accu;
long corr, lnorm;
int i;
// cancel first normalizer tap from previous round
lnorm = 0;
for (i = 1; i <= channels; i ++)
lnorm -= (pV1[-i] * pV1[-i]) >> overlapDividerBits;
pVec1 = (__m64*)pV1;
pVec2 = (__m64*)pV2;
shifter = _m_from_int(overlapDividerBits);
accu = _mm_setzero_si64();
// Process 4 parallel sets of 2 * stereo samples or 4 * mono samples
// during each round for improved CPU-level parallellization.
for (i = 0; i < channels * overlapLength / 16; i ++)
__m64 temp;
// dictionary of instructions:
// _m_pmaddwd : 4*16bit multiply-add, resulting two 32bits = [a0*b0+a1*b1 ; a2*b2+a3*b3]
// _mm_add_pi32 : 2*32bit add
// _m_psrad : 32bit right-shift
temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[1], pVec2[1]), shifter));
accu = _mm_add_pi32(accu, temp);
temp = _mm_add_pi32(_mm_sra_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]), shifter),
_mm_sra_pi32(_mm_madd_pi16(pVec1[3], pVec2[3]), shifter));
accu = _mm_add_pi32(accu, temp);
pVec1 += 4;
pVec2 += 4;
// copy hi-dword of mm0 to lo-dword of mm1, then sum mmo+mm1
// and finally store the result into the variable "corr"
accu = _mm_add_pi32(accu, _mm_srli_si64(accu, 32));
corr = _m_to_int(accu);
// Clear MMS state
// update normalizer with last samples of this round
pV1 = (short *)pVec1;
for (int j = 1; j <= channels; j ++)
lnorm += (pV1[-j] * pV1[-j]) >> overlapDividerBits;
dnorm += (double)lnorm;
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation
return (double)corr / sqrt((dnorm < 1e-9) ? 1.0 : dnorm);
void TDStretchMMX::clearCrossCorrState()
@ -220,6 +287,7 @@ void TDStretchMMX::overlapStereo(short *output, const short *input) const
FIRFilterMMX::FIRFilterMMX() : FIRFilter()
filterCoeffsAlign = NULL;
filterCoeffsUnalign = NULL;

@ -71,7 +71,7 @@ using namespace soundtouch;
#include <math.h>
// Calculates cross correlation of two buffers
double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2) const
double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &anorm) const
int i;
const float *pVec1;
@ -141,11 +141,11 @@ double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2) const
// return value = vSum[0] + vSum[1] + vSum[2] + vSum[3]
float *pvNorm = (float*)&vNorm;
double norm = sqrt(pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]);
if (norm < 1e-9) norm = 1.0; // to avoid div by zero
float norm = (pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]);
anorm = norm;
float *pvSum = (float*)&vSum;
return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]) / norm;
return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]) / sqrt(norm < 1e-9 ? 1.0 : norm);
/* This is approximately corresponding routine in C-language yet without normalization:
double corr, norm;
@ -182,6 +182,16 @@ double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2) const
double TDStretchSSE::calcCrossCorrAccumulate(const float *pV1, const float *pV2, double &norm) const
// call usual calcCrossCorr function because SSE does not show big benefit of
// accumulating "norm" value, and also the "norm" rolling algorithm would get
// complicated due to SSE-specific alignment-vs-nonexact correlation rules.
return calcCrossCorr(pV1, pV2, norm);
// implementation of SSE optimized functions of class 'FIRFilter'
@ -249,14 +259,17 @@ uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint n
assert(((ulongptr)filterCoeffsAlign) % 16 == 0);
// filter is evaluated for two stereo samples with each iteration, thus use of 'j += 2'
#pragma omp parallel for
for (j = 0; j < count; j += 2)
const float *pSrc;
float *pDest;
const __m128 *pFil;
__m128 sum1, sum2;
uint i;
pSrc = (const float*)source; // source audio data
pSrc = (const float*)source + j * 2; // source audio data
pDest = dest + j * 2; // destination audio data
pFil = (const __m128*)filterCoeffsAlign; // filter coefficients. NOTE: Assumes coefficients
// are aligned to 16-byte boundary
sum1 = sum2 = _mm_setzero_ps();
@ -289,12 +302,10 @@ uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint n
// to sum the two hi- and lo-floats of these registers together.
// post-shuffle & add the filtered values and store to dest.
_mm_storeu_ps(dest, _mm_add_ps(
_mm_storeu_ps(pDest, _mm_add_ps(
_mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(1,0,3,2)), // s2_1 s2_0 s1_3 s1_2
_mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(3,2,1,0)) // s2_3 s2_2 s1_1 s1_0
source += 4;
dest += 4;
// Ideas for further improvement:

{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug|Win32.Build.0 = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug|x64.ActiveCfg = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug|x64.ActiveCfg = Debug|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug|x64.Build.0 = Debug|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Devel|Win32.ActiveCfg = Devel|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Devel|Win32.Build.0 = Devel|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Devel|x64.ActiveCfg = Devel|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Devel|x64.ActiveCfg = Devel|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Devel|x64.Build.0 = Devel|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX|Win32.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX|x64.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX|x64.ActiveCfg = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX|x64.Build.0 = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|Win32.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|x64.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|x64.ActiveCfg = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release AVX2|x64.Build.0 = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|Win32.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|x64.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|x64.ActiveCfg = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSE4|x64.Build.0 = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSSE3|Win32.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSSE3|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSSE3|x64.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSSE3|x64.ActiveCfg = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release SSSE3|x64.Build.0 = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|Win32.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|x64.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|x64.ActiveCfg = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|x64.Build.0 = Release|x64
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug|Win32.ActiveCfg = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug|Win32.Build.0 = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug|x64.ActiveCfg = Debug|Win32

{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug|x64.ActiveCfg = Debug|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug|x64.ActiveCfg = Debug|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Debug|x64.Build.0 = Debug|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Devel|Win32.ActiveCfg = Devel|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Devel|Win32.Build.0 = Devel|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Devel|x64.ActiveCfg = Devel|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Devel|x64.ActiveCfg = Devel|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Devel|x64.Build.0 = Devel|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|Win32.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|Win32.Build.0 = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|x64.ActiveCfg = Release|Win32
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|x64.ActiveCfg = Release|x64
{E9B51944-7E6D-4BCD-83F2-7BBD5A46182D}.Release|x64.Build.0 = Release|x64
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug|Win32.ActiveCfg = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug|Win32.Build.0 = Debug|Win32
{2F6C0388-20CB-4242-9F6C-A6EBB6A83F47}.Debug|x64.ActiveCfg = Debug|Win32

View File

@ -31,7 +31,7 @@
#ifdef __linux__
#include "WavFile.h"
#include "soundtouch/WavFile.h"
#include "soundtouch/source/SoundStretch/WavFile.h"
char libraryName[256];

s32 g_logsound = 0;