mirror of https://github.com/PCSX2/pcsx2.git
3rdparty: Upgrade soundtouch lib to 2.3.1
This commit is contained in:
parent
791f2a63ac
commit
e37afd6976
|
@ -2,7 +2,7 @@
|
||||||
Version 2.1, February 1999
|
Version 2.1, February 1999
|
||||||
|
|
||||||
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
|
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
|
||||||
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
Everyone is permitted to copy and distribute verbatim copies
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
of this license document, but changing it is not allowed.
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
@ -117,7 +117,7 @@ be combined with the library in order to run.
|
||||||
|
|
||||||
0. This License Agreement applies to any software library or other
|
0. This License Agreement applies to any software library or other
|
||||||
program which contains a notice placed by the copyright holder or
|
program which contains a notice placed by the copyright holder or
|
||||||
other authoried party saying it may be distributed under the terms of
|
other authorized party saying it may be distributed under the terms of
|
||||||
this Lesser General Public License (also called "this License").
|
this Lesser General Public License (also called "this License").
|
||||||
Each licensee is addressed as "you".
|
Each licensee is addressed as "you".
|
||||||
|
|
||||||
|
|
|
@ -1,21 +1,22 @@
|
||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||||
<html>
|
<html>
|
||||||
|
|
||||||
<head>
|
<head>
|
||||||
<title>SoundTouch library README</title>
|
<title>SoundTouch library README</title>
|
||||||
<meta http-equiv="Content-Type"
|
|
||||||
content="text/html; charset=windows-1252">
|
|
||||||
<meta http-equiv="Content-Language" content="en-us">
|
<meta http-equiv="Content-Language" content="en-us">
|
||||||
<meta name="author" content="Olli Parviainen">
|
<meta name="author" content="Olli Parviainen">
|
||||||
<meta name="description"
|
<meta name="description" content="Readme file for SoundTouch audio processing library">
|
||||||
content="Readme file for SoundTouch audio processing library">
|
|
||||||
<style>
|
<style>
|
||||||
body {font-family: Arial, Helvetica; }
|
body {
|
||||||
|
font-family: Arial, Helvetica;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body class="normal">
|
<body class="normal">
|
||||||
<hr>
|
<hr>
|
||||||
<h1>SoundTouch audio processing library v2.1.2</h1>
|
<h1>SoundTouch audio processing library v2.3.1</h1>
|
||||||
<p class="normal">SoundTouch library Copyright © Olli Parviainen 2001-2018</p>
|
<p class="normal">SoundTouch library Copyright © Olli Parviainen 2001-2021</p>
|
||||||
<hr>
|
<hr>
|
||||||
<h2>1. Introduction </h2>
|
<h2>1. Introduction </h2>
|
||||||
<p>SoundTouch is an open-source audio processing library that allows
|
<p>SoundTouch is an open-source audio processing library that allows
|
||||||
|
@ -33,11 +34,12 @@ same time</li>
|
||||||
<h3>1.1 Contact information </h3>
|
<h3>1.1 Contact information </h3>
|
||||||
<p>Author email: oparviai 'at' iki.fi </p>
|
<p>Author email: oparviai 'at' iki.fi </p>
|
||||||
<p>SoundTouch WWW page: <a href="http://soundtouch.surina.net">http://soundtouch.surina.net</a></p>
|
<p>SoundTouch WWW page: <a href="http://soundtouch.surina.net">http://soundtouch.surina.net</a></p>
|
||||||
<p>SoundTouch git repository: <a href="https://gitlab.com/soundtouch/soundtouch.git">https://gitlab.com/soundtouch/soundtouch.git</a></p>
|
<p>SoundTouch git repository: <a
|
||||||
|
href="https://gitlab.com/soundtouch/soundtouch.git">https://gitlab.com/soundtouch/soundtouch.git</a></p>
|
||||||
<hr>
|
<hr>
|
||||||
<h2>2. Compiling SoundTouch</h2>
|
<h2>2. Compiling SoundTouch</h2>
|
||||||
<p>Before compiling, notice that you can choose the sample data format if it's
|
<p>Before compiling, notice that you can choose the sample data format if it's
|
||||||
desirable to use floating point sample data instead of 16bit integers. See
|
desirable to use 16bit integer sample data instead of floating point samples. See
|
||||||
section "sample data format" for more information.</p>
|
section "sample data format" for more information.</p>
|
||||||
<p>Also notice that SoundTouch can use OpenMP instructions for parallel
|
<p>Also notice that SoundTouch can use OpenMP instructions for parallel
|
||||||
computation to accelerate the runtime processing speed in multi-core systems,
|
computation to accelerate the runtime processing speed in multi-core systems,
|
||||||
|
@ -70,15 +72,17 @@ folders.</p>
|
||||||
<li>x64 64bit: C:\Program Files (x86)\Microsoft Visual Studio
|
<li>x64 64bit: C:\Program Files (x86)\Microsoft Visual Studio
|
||||||
9.0\VC\redist\amd64\Microsoft.VC90.OPENMP\vcomp90.dll</li>
|
9.0\VC\redist\amd64\Microsoft.VC90.OPENMP\vcomp90.dll</li>
|
||||||
</ul>
|
</ul>
|
||||||
<p>In Visual Studio 2008, a SP1 version may be required for these libraries. In
|
<p>In other VC++ versions the required library will be expectedly found in similar
|
||||||
other VC++ versions the required library will be expectedly found in similar
|
|
||||||
"redist" location.</p>
|
"redist" location.</p>
|
||||||
<p>Notice that as minor demonstration of a "dll hell" phenomenon both the 32-bit
|
<p>Notice that as minor demonstration of a "dll hell" phenomenon both the 32-bit
|
||||||
and 64-bit version of vcomp90.dll have the same filename but different contents,
|
and 64-bit version of vcomp90.dll have the same filename but different contents,
|
||||||
thus choose the proper version to allow the program start.</p>
|
thus choose the proper version to allow the program to start.</p>
|
||||||
<h3>2.2. Building in Gnu platforms</h3>
|
<h3>2.2. Building in Gnu platforms</h3>
|
||||||
<p>The SoundTouch library compiles in practically any platform
|
<p>The SoundTouch library compiles in practically any platform
|
||||||
supporting GNU compiler (GCC) tools. SoundTouch requires GCC version 4.3 or later.</p>
|
supporting GNU compiler (GCC) tools.
|
||||||
|
<h4>2.2.1 Compiling with autotools</h4>
|
||||||
|
<p>To install build prerequisites for 'autotools' tool chain:</p>
|
||||||
|
<pre> sudo apt-get install automake autoconf libtool build-essential</pre>
|
||||||
<p>To build and install the binaries, run the following commands in
|
<p>To build and install the binaries, run the following commands in
|
||||||
/soundtouch directory:</p>
|
/soundtouch directory:</p>
|
||||||
<table border="0" cellpadding="0" cellspacing="4">
|
<table border="0" cellpadding="0" cellspacing="4">
|
||||||
|
@ -125,41 +129,27 @@ destination locations.</p>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
<h4><b>2.2.1 Required GNU tools</b></h4>
|
|
||||||
<p> <span style="font-weight: bold;">Bash shell</span>, <span
|
|
||||||
style="font-weight: bold;">GNU C++ compiler</span>, <span
|
|
||||||
style="font-weight: bold;">libtool</span>, <span
|
|
||||||
style="font-weight: bold;">autoconf</span> and <span
|
|
||||||
style="font-weight: bold;">automake</span> tools
|
|
||||||
are required for compiling the SoundTouch library. These are usually
|
|
||||||
included with the GNU/Linux distribution, but if not, install these
|
|
||||||
packages first. For example, Ubuntu Linux can acquire and install
|
|
||||||
these with the following command:</p>
|
|
||||||
<pre><b>sudo apt-get install automake autoconf libtool build-essential</b></pre>
|
|
||||||
<h4><b>2.2.2 Problems with GCC compiler compatibility</b></h4>
|
|
||||||
<p>At the release time the SoundTouch package has been tested to
|
|
||||||
compile in GNU/Linux platform. However, If you have problems getting the
|
|
||||||
SoundTouch library compiled, try disabling optimizations that are specific for
|
|
||||||
x86 processors by running <b>./configure</b> script with switch
|
|
||||||
<blockquote>
|
|
||||||
<pre>--enable-x86-optimizations=no</pre>
|
|
||||||
</blockquote>
|
|
||||||
|
|
||||||
Alternatively, if you don't use GNU Configure system, edit file "include/STTypes.h"
|
<b>Compiling portable Shared Library / DLL version</b>
|
||||||
directly and remove the following definition:<blockquote>
|
<p> The GNU autotools compilation does not automatically create a shared-library version of
|
||||||
<pre>#define SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS 1</pre>
|
SoundTouch (.so or .dll) that features position-independent code and C-language
|
||||||
</blockquote>
|
api that are more suitable for cross-language development than C++ libraries.</p>
|
||||||
|
<p> Use script "make-gnu-dll-sh" to build a portable dynamic library version if such is desired.</p>
|
||||||
|
|
||||||
<h4><b>2.2.3 Compiling Shared Library / DLL version in Cygwin</b></h4>
|
<h4><b>2.2.2 Compiling with cmake</b></h4>
|
||||||
<p>
|
<p>'cmake' build scripts are provided as an alternative to the autotools toolchain.</p>
|
||||||
The GNU compilation does not automatically create a shared-library version of
|
<p>To install cmake build prerequisites:</p>
|
||||||
SoundTouch (.so or .dll). If such is desired, then you can create it as follows
|
<pre> sudo apt-get install libtool build-essential cmake</pre>
|
||||||
after running the usual compilation:</p>
|
<p>To build:</p>
|
||||||
<blockquote>
|
<pre>
|
||||||
<pre>g++ -shared -static -DDLL_EXPORTS -I../../include -o SoundTouch.dll \
|
cmake .
|
||||||
SoundTouchDLL.cpp ../SoundTouch/.libs/libSoundTouch.a
|
make -j
|
||||||
sstrip SoundTouch.dll</pre>
|
make install</pre>
|
||||||
</blockquote>
|
<p>To compile the additional portable Shared Library / DLL version with the native C-language API:</p>
|
||||||
|
<pre>
|
||||||
|
cmake . -DSOUNDTOUCH_DLL=ON
|
||||||
|
make -j
|
||||||
|
make install</pre>
|
||||||
|
|
||||||
<h3>2.3. Building in Android</h3>
|
<h3>2.3. Building in Android</h3>
|
||||||
<p>Android compilation instructions are within the
|
<p>Android compilation instructions are within the
|
||||||
|
@ -174,18 +164,26 @@ library binary version to use.</p>
|
||||||
example application that processes WAV audio files using SoundTouch library in
|
example application that processes WAV audio files using SoundTouch library in
|
||||||
Android devices.</p>
|
Android devices.</p>
|
||||||
|
|
||||||
|
<h3>2.4. Building in Mac</h3>
|
||||||
|
<p>Install autoconf tool as instructed in <a
|
||||||
|
href="http://macappstore.org/autoconf/">http://macappstore.org/autoconf/</a>, or alternatively the 'cmake' toolchain.</p>
|
||||||
|
<p>Then, build as described above in section "Building in Gnu platforms".</p>
|
||||||
|
|
||||||
<hr>
|
<hr>
|
||||||
<h2>3. About implementation & Usage tips <h3>3.1. Supported sample data formats</h3>
|
<h2>3. About implementation & Usage tips <h3>3.1. Supported sample data formats</h3>
|
||||||
<p>The sample data format can be chosen between 16bit signed integer
|
<p>The sample data format can be chosen between 16bit signed integer
|
||||||
and 32bit floating point values. The default is 32bit floating point format,
|
and 32bit floating point values.</p>
|
||||||
which will also provide slightly better sound quality over the integer format. </p>
|
</p> The default sample type is 32bit floating point format,
|
||||||
|
which also provides better sound quality than integer format because
|
||||||
|
integer algorithms need to scale already intermediate calculation results to
|
||||||
|
avoid integer overflows. These early integer scalings can slightly degrade
|
||||||
|
output quality.</p>
|
||||||
<p> In Windows environment, the sample data format is chosen in file
|
<p> In Windows environment, the sample data format is chosen in file
|
||||||
"STTypes.h" by choosing one of the following defines:</p>
|
"STTypes.h" by choosing one of the following defines:</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li> <span style="font-weight: bold;">#define
|
<li> <span style="font-weight: bold;">#define
|
||||||
SOUNDTOUCH_INTEGER_SAMPLES</span> for 16bit signed integer</li>
|
SOUNDTOUCH_INTEGER_SAMPLES</span> for 16bit signed integer</li>
|
||||||
<li> <span style="font-weight: bold;">#define </span><span
|
<li> <span style="font-weight: bold;">#define </span><span style="font-weight: bold;">SOUNDTOUCH_</span><span
|
||||||
style="font-weight: bold;">SOUNDTOUCH_</span><span
|
|
||||||
style="font-weight: bold;">FLOAT_SAMPLES</span> for 32bit floating
|
style="font-weight: bold;">FLOAT_SAMPLES</span> for 32bit floating
|
||||||
point</li>
|
point</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
@ -301,7 +299,8 @@ ends of the consecutive sequences will overlap with each other.<br>
|
||||||
<br>
|
<br>
|
||||||
This shouldn't be that critical parameter. If you reduce the
|
This shouldn't be that critical parameter. If you reduce the
|
||||||
DEFAULT_SEQUENCE_MS setting by a large amount, you might wish to try a
|
DEFAULT_SEQUENCE_MS setting by a large amount, you might wish to try a
|
||||||
smaller value on this.</li>
|
smaller value on this.
|
||||||
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
<p>Notice that these parameters can also be set during execution time
|
<p>Notice that these parameters can also be set during execution time
|
||||||
with functions "<strong>TDStretch::setParameters()</strong>" and "<strong>SoundTouch::setSetting()</strong>".</p>
|
with functions "<strong>TDStretch::setParameters()</strong>" and "<strong>SoundTouch::setSetting()</strong>".</p>
|
||||||
|
@ -359,6 +358,29 @@ computation burden</td>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
<h3>3.5 Performance Optimizations </h3>
|
<h3>3.5 Performance Optimizations </h3>
|
||||||
|
<p><strong>Integer vs floating point:</strong></p>
|
||||||
|
<p>Floating point sample type is generally recommended because it provides
|
||||||
|
better sound quality.</p>
|
||||||
|
|
||||||
|
<p>However, execution speed difference between integer and floating point processing
|
||||||
|
depends on the CPU architecture. As rule of thumb,
|
||||||
|
<ul>
|
||||||
|
<li>in 32-bit x86 floating point and integer are roughly equally fast</li>
|
||||||
|
<li>in 64-bit x86/x64 floating point can be significantly faster than integer
|
||||||
|
version, because MMX integer optimizations are not available in the x64 architecture.
|
||||||
|
That depends on the compiler however, so that gcc can autovectorize integer routines
|
||||||
|
to work equally fast as floating point, where as Visual C++ (2017) does not
|
||||||
|
perform equally well and produces integer code that runs some 3x slower than
|
||||||
|
SSE-optimized floating poing code.
|
||||||
|
</li>
|
||||||
|
<li>in ARMv7 integer routines are twice as fast as floating point. Their
|
||||||
|
relative difference is roughly the same both with and without NEON; NEON
|
||||||
|
vfpu can however bring 2.4x speed improvement.
|
||||||
|
</li>
|
||||||
|
<li>in other platforms: try out if the execution time performance makes a
|
||||||
|
big difference</li>
|
||||||
|
</ul>
|
||||||
|
</p>
|
||||||
<p><strong>General optimizations:</strong></p>
|
<p><strong>General optimizations:</strong></p>
|
||||||
<p>The time-stretch routine has a 'quick' mode that substantially
|
<p>The time-stretch routine has a 'quick' mode that substantially
|
||||||
speeds up the algorithm but may slightly compromise the sound quality.
|
speeds up the algorithm but may slightly compromise the sound quality.
|
||||||
|
@ -373,11 +395,15 @@ function with parameter id of SETTING_USE_QUICKSEEK and value
|
||||||
intrinsics, providing about a 3x processing speedup for x86 compatible
|
intrinsics, providing about a 3x processing speedup for x86 compatible
|
||||||
processors vs. non-SIMD implementation:</p>
|
processors vs. non-SIMD implementation:</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li> Intel MMX optimized routines are used with x86 CPUs when 16bit integer
|
<li> MMX optimized routines are used in 32-bit x86 build when 16bit integer
|
||||||
sample type is used</li>
|
sample type is used</li>
|
||||||
<li> Intel SSE optimized routines are used with x86 CPUs when 32bit floating
|
<li> SSE optimized routines are used in 32- and 64-bit x86 CPUs when 32bit
|
||||||
point sample type is used</li>
|
floating point sample type is used</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
<p>The algorithms are tuned to utilize autovectorization efficiently
|
||||||
|
also in other CPU architectures, for example ARM cpus see approx 2.4x processing
|
||||||
|
speedup when NEON SIMD support is present.
|
||||||
|
</p>
|
||||||
<h3>3.5 OpenMP parallel computation</h3>
|
<h3>3.5 OpenMP parallel computation</h3>
|
||||||
<p>SoundTouch 1.9 onwards support running the algorithms parallel in several CPU
|
<p>SoundTouch 1.9 onwards support running the algorithms parallel in several CPU
|
||||||
cores. Based on benchmark the experienced multi-core processing speed-up gain
|
cores. Based on benchmark the experienced multi-core processing speed-up gain
|
||||||
|
@ -385,7 +411,8 @@ ranges between +30% (on a high-spec dual-core x86 Windows PC) to 215% (on a mode
|
||||||
quad-core ARM of Raspberry Pi2). </p>
|
quad-core ARM of Raspberry Pi2). </p>
|
||||||
<p>See an external blog article with more detailed discussion about the
|
<p>See an external blog article with more detailed discussion about the
|
||||||
<a href="http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices/">
|
<a href="http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices/">
|
||||||
SoundTouch OpenMP optimization</a>.</p>
|
SoundTouch OpenMP optimization</a>.
|
||||||
|
</p>
|
||||||
<p>The parallel computing support is implemented using OpenMP spec 3.0
|
<p>The parallel computing support is implemented using OpenMP spec 3.0
|
||||||
instructions. These instructions are supported by Visual C++ 2008 and later, and
|
instructions. These instructions are supported by Visual C++ 2008 and later, and
|
||||||
GCC v4.2 and later. Compilers that do not supporting OpenMP will ignore these
|
GCC v4.2 and later. Compilers that do not supporting OpenMP will ignore these
|
||||||
|
@ -409,7 +436,8 @@ library as follows:</p>
|
||||||
</strong>settings. Set
|
</strong>settings. Set
|
||||||
there "<strong>OpenMP support</strong>" to "<strong>Yes</strong>". Alternatively add
|
there "<strong>OpenMP support</strong>" to "<strong>Yes</strong>". Alternatively add
|
||||||
<strong>/openmp</strong> switch to command-line
|
<strong>/openmp</strong> switch to command-line
|
||||||
parameters</li>
|
parameters
|
||||||
|
</li>
|
||||||
<li><strong>GNU</strong>: Run the configure script with "<strong>./configure
|
<li><strong>GNU</strong>: Run the configure script with "<strong>./configure
|
||||||
--enable-openmp</strong>" switch, then run make as usually</li>
|
--enable-openmp</strong>" switch, then run make as usually</li>
|
||||||
<li><strong>Android</strong>: Add "<strong>-fopenmp</strong>" switches to compiler & linker
|
<li><strong>Android</strong>: Add "<strong>-fopenmp</strong>" switches to compiler & linker
|
||||||
|
@ -575,13 +603,49 @@ this corresponds to lowering the pitch by -0.318 semitones:</p>
|
||||||
<hr>
|
<hr>
|
||||||
<h2>5. Change History</h2>
|
<h2>5. Change History</h2>
|
||||||
<h3>5.1. SoundTouch library Change History </h3>
|
<h3>5.1. SoundTouch library Change History </h3>
|
||||||
|
<p><b>2.3.1:</b></p>
|
||||||
|
<ul>
|
||||||
|
<li>Adjusted cmake build settings and header files that cmake installs</li>
|
||||||
|
</ul>
|
||||||
|
<p><b>2.3.0:</b></p>
|
||||||
|
<ul>
|
||||||
|
<li>Disable setting "SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION" by default. The original
|
||||||
|
purpose of this setting was to avoid performance penalty due to unaligned SIMD memory
|
||||||
|
accesses in old CPUs, but that is not any more issue in concurrent CPU SIMD implementations
|
||||||
|
and having this setting enabled can cause slight compromise in result quality.
|
||||||
|
</li>
|
||||||
|
<li>Bugfix: soundtouch.clear() to really clear whole processing pipeline state. Earlier
|
||||||
|
individual variables were left uncleared, which caused slightly different result if
|
||||||
|
the same audio stream were processed again after calling clear().
|
||||||
|
</li>
|
||||||
|
<li>Bugfix: TDstretch to align initial offset position to be in middle of correlation search
|
||||||
|
window. This ensures that with zero tempo change the output will be same as input.
|
||||||
|
</li>
|
||||||
|
<li>Bugfix: Fix a bug in TDstrectch with too small initial skipFract value that occurred
|
||||||
|
with certain processing parameter settings: Replace assert with assignment that
|
||||||
|
corrects the situation.
|
||||||
|
</li>
|
||||||
|
<li>Remove OpenMP "_init_threading" workaround from Android build as it's not needed with concurrent
|
||||||
|
Android SDKs any more.</li>
|
||||||
|
</ul>
|
||||||
|
<p><b>2.2:</b></p>
|
||||||
|
<ul>
|
||||||
|
<li>Improved source codes so that compiler can autovectorize them more effectively.
|
||||||
|
This brings remarkable improvement e.g. ARM cpus equipped with NEON vfpu: Bencmarked
|
||||||
|
2.4x improvement in execution speed in ARMv7l vs the previous SoundTouch version
|
||||||
|
for both integer and floating point sample types.
|
||||||
|
</li>
|
||||||
|
<li>Bugfix: Resolved bad sound quality when using integer sample types in non-x86 CPU</li>
|
||||||
|
<li>Bugfix: Fixed possible reading past end of array in BPM peak detection algorithm</li>
|
||||||
|
</ul>
|
||||||
<p><b>2.1.2:</b></p>
|
<p><b>2.1.2:</b></p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>Bump version to 2.1.2 also in configure.ac. The earlier release had old version info for GNU autotools.</li>
|
<li>Bump version to 2.1.2 also in configure.ac. The earlier release had old version info for GNU autotools.</li>
|
||||||
</ul>
|
</ul>
|
||||||
<p><b>2.1.1:</b></p>
|
<p><b>2.1.1:</b></p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>Bugfixes: Fixed potential buffer overwrite bugs in WavFile routines. Replaced asserts with runtime exceptions.</li>
|
<li>Bugfixes: Fixed potential buffer overwrite bugs in WavFile routines. Replaced asserts with runtime exceptions.
|
||||||
|
</li>
|
||||||
<li>Android: Migrated the SoundTouch Android example to new Android Studio</li>
|
<li>Android: Migrated the SoundTouch Android example to new Android Studio</li>
|
||||||
<li>Automake: unset ACLOCAL in bootstrap script in case earlier build script has set it</li>
|
<li>Automake: unset ACLOCAL in bootstrap script in case earlier build script has set it</li>
|
||||||
|
|
||||||
|
@ -602,11 +666,13 @@ this corresponds to lowering the pitch by -0.318 semitones:</p>
|
||||||
</ul>
|
</ul>
|
||||||
<p><b>2.0:</b></p>
|
<p><b>2.0:</b></p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>Added functions to get initial processing latency, duration ratio between the original input and processed output tracks, and clarified reporting of input/output batch sizes</li>
|
<li>Added functions to get initial processing latency, duration ratio between the original input and processed
|
||||||
|
output tracks, and clarified reporting of input/output batch sizes</li>
|
||||||
<li>Fixed issue that added brief sequence of silence to beginning of output audio</li>
|
<li>Fixed issue that added brief sequence of silence to beginning of output audio</li>
|
||||||
<li>Adjusted algorithm parameters to reduce reverberating effect at tempo slowdown</li>
|
<li>Adjusted algorithm parameters to reduce reverberating effect at tempo slowdown</li>
|
||||||
<li>Bugfix: Fixed a glitch that could cause negative array indexing in quick seek algorithm</li>
|
<li>Bugfix: Fixed a glitch that could cause negative array indexing in quick seek algorithm</li>
|
||||||
<li>Bugfix: flush() didn't properly flush final samples from the pipeline on 2nd time in case that soundtouch object instance was recycled and used for processing a second audio stream.</li>
|
<li>Bugfix: flush() didn't properly flush final samples from the pipeline on 2nd time in case that soundtouch
|
||||||
|
object instance was recycled and used for processing a second audio stream.</li>
|
||||||
<li>Bugfix: Pi value had incorrect 9th/10th decimals</li>
|
<li>Bugfix: Pi value had incorrect 9th/10th decimals</li>
|
||||||
<li>Added C# example application that uses SoundTouch dll library for processing MP3 files</li>
|
<li>Added C# example application that uses SoundTouch dll library for processing MP3 files</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
@ -616,8 +682,10 @@ this corresponds to lowering the pitch by -0.318 semitones:</p>
|
||||||
</ul>
|
</ul>
|
||||||
<p><b>1.9.1:</b></p>
|
<p><b>1.9.1:</b></p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>Improved SoundTouch::flush() function so that it returns precisely the desired amount of samples for exact output duration control</li>
|
<li>Improved SoundTouch::flush() function so that it returns precisely the desired amount of samples for exact
|
||||||
<li>Redesigned quickseek algorithm for improved sound quality when using the quickseek mode. The new quickseek algorithm can find 99% as good results as the
|
output duration control</li>
|
||||||
|
<li>Redesigned quickseek algorithm for improved sound quality when using the quickseek mode. The new quickseek
|
||||||
|
algorithm can find 99% as good results as the
|
||||||
default full-scan mode, while the quickseek algorithm is remarkable less
|
default full-scan mode, while the quickseek algorithm is remarkable less
|
||||||
CPU intensive.</li>
|
CPU intensive.</li>
|
||||||
<li>Added adaptive integer divider scaling for improved sound quality when using integer processing algorithm
|
<li>Added adaptive integer divider scaling for improved sound quality when using integer processing algorithm
|
||||||
|
@ -625,7 +693,8 @@ this corresponds to lowering the pitch by -0.318 semitones:</p>
|
||||||
</ul>
|
</ul>
|
||||||
<p><b>1.9:</b></p>
|
<p><b>1.9:</b></p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>Added support for parallel computation support via OpenMP primitives for better performance in multicore systems.
|
<li>Added support for parallel computation support via OpenMP primitives for better performance in multicore
|
||||||
|
systems.
|
||||||
Benchmarks show that achieved parallel processing speedup improvement
|
Benchmarks show that achieved parallel processing speedup improvement
|
||||||
typically range from +30% (x86 dual-core) to +180% (ARM quad-core). The
|
typically range from +30% (x86 dual-core) to +180% (ARM quad-core). The
|
||||||
OpenMP optimizations are disabled by default, see OpenMP notes above in this
|
OpenMP optimizations are disabled by default, see OpenMP notes above in this
|
||||||
|
@ -869,8 +938,10 @@ submitted bugfixes:</p>
|
||||||
<li> Jamie Bullock</li>
|
<li> Jamie Bullock</li>
|
||||||
<li> Chris Bryan</li>
|
<li> Chris Bryan</li>
|
||||||
<li> Jacek Caban</li>
|
<li> Jacek Caban</li>
|
||||||
|
<li> Marketa Calabkova</li>
|
||||||
<li> Brian Cameron</li>
|
<li> Brian Cameron</li>
|
||||||
<li> Jason Champion</li>
|
<li> Jason Champion</li>
|
||||||
|
<li> Giuseppe Cigala</li>
|
||||||
<li> David Clark</li>
|
<li> David Clark</li>
|
||||||
<li> Patrick Colis</li>
|
<li> Patrick Colis</li>
|
||||||
<li> Miquel Colon</li>
|
<li> Miquel Colon</li>
|
||||||
|
@ -901,6 +972,7 @@ submitted bugfixes:</p>
|
||||||
<li> Tyson Smith</li>
|
<li> Tyson Smith</li>
|
||||||
<li> John Stumpo</li>
|
<li> John Stumpo</li>
|
||||||
<li> Mario di Vece</li>
|
<li> Mario di Vece</li>
|
||||||
|
<li> Rémi Verschelde</li>
|
||||||
<li> Katja Vetter</li>
|
<li> Katja Vetter</li>
|
||||||
<li> Wu Q.</li>
|
<li> Wu Q.</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
@ -922,6 +994,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA</p>
|
||||||
<p>---</p>
|
<p>---</p>
|
||||||
<p>commercial license alternative also available, contact author for details.</p>
|
<p>commercial license alternative also available, contact author for details.</p>
|
||||||
<hr>
|
<hr>
|
||||||
<p><i>README.html file updated in November-2018</i></p>
|
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
|
@ -170,6 +170,9 @@ public:
|
||||||
/// allow trimming (downwards) amount of samples in pipeline.
|
/// allow trimming (downwards) amount of samples in pipeline.
|
||||||
/// Returns adjusted amount of samples
|
/// Returns adjusted amount of samples
|
||||||
uint adjustAmountOfSamples(uint numSamples);
|
uint adjustAmountOfSamples(uint numSamples);
|
||||||
|
|
||||||
|
/// Add silence to end of buffer
|
||||||
|
void addSilent(uint nSamples);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -121,10 +121,10 @@ namespace soundtouch
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// If defined, allows the SIMD-optimized routines to take minor shortcuts
|
// If defined, allows the SIMD-optimized routines to skip unevenly aligned
|
||||||
// for improved performance. Undefine to require faithfully similar SIMD
|
// memory offsets that can cause performance penalty in some SIMD implementations.
|
||||||
// calculations as in normal C implementation.
|
// Causes slight compromise in sound quality.
|
||||||
#define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION 1
|
// #define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION 1
|
||||||
|
|
||||||
|
|
||||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
|
@ -149,8 +149,9 @@ namespace soundtouch
|
||||||
|
|
||||||
// floating point samples
|
// floating point samples
|
||||||
typedef float SAMPLETYPE;
|
typedef float SAMPLETYPE;
|
||||||
// data type for sample accumulation: Use double to utilize full precision.
|
// data type for sample accumulation: Use float also here to enable
|
||||||
typedef double LONG_SAMPLETYPE;
|
// efficient autovectorization
|
||||||
|
typedef float LONG_SAMPLETYPE;
|
||||||
|
|
||||||
#ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
|
#ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
|
||||||
// Allow SSE optimizations
|
// Allow SSE optimizations
|
||||||
|
@ -159,7 +160,13 @@ namespace soundtouch
|
||||||
|
|
||||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
|
|
||||||
};
|
#if ((SOUNDTOUCH_ALLOW_SSE) || (__SSE__) || (SOUNDTOUCH_USE_NEON))
|
||||||
|
#if SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
|
||||||
|
#define ST_SIMD_AVOID_UNALIGNED
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// define ST_NO_EXCEPTION_HANDLING switch to disable throwing std exceptions:
|
// define ST_NO_EXCEPTION_HANDLING switch to disable throwing std exceptions:
|
||||||
// #define ST_NO_EXCEPTION_HANDLING 1
|
// #define ST_NO_EXCEPTION_HANDLING 1
|
||||||
|
|
|
@ -72,10 +72,10 @@ namespace soundtouch
|
||||||
{
|
{
|
||||||
|
|
||||||
/// Soundtouch library version string
|
/// Soundtouch library version string
|
||||||
#define SOUNDTOUCH_VERSION "2.1.2"
|
#define SOUNDTOUCH_VERSION "2.3.1"
|
||||||
|
|
||||||
/// SoundTouch library version id
|
/// SoundTouch library version id
|
||||||
#define SOUNDTOUCH_VERSION_ID (20102)
|
#define SOUNDTOUCH_VERSION_ID (20301)
|
||||||
|
|
||||||
//
|
//
|
||||||
// Available setting IDs for the 'setSetting' & 'get_setting' functions:
|
// Available setting IDs for the 'setSetting' & 'get_setting' functions:
|
||||||
|
|
|
@ -313,7 +313,7 @@ void BPMDetect::updateXCorr(int process_samples)
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (offs = windowStart; offs < windowLen; offs ++)
|
for (offs = windowStart; offs < windowLen; offs ++)
|
||||||
{
|
{
|
||||||
double sum;
|
float sum;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
sum = 0;
|
sum = 0;
|
||||||
|
@ -341,7 +341,6 @@ void BPMDetect::updateBeatPos(int process_samples)
|
||||||
// static double thr = 0.0003;
|
// static double thr = 0.0003;
|
||||||
double posScale = (double)this->decimateBy / (double)this->sampleRate;
|
double posScale = (double)this->decimateBy / (double)this->sampleRate;
|
||||||
int resetDur = (int)(0.12 / posScale + 0.5);
|
int resetDur = (int)(0.12 / posScale + 0.5);
|
||||||
double corrScale = 1.0 / (double)(windowLen - windowStart);
|
|
||||||
|
|
||||||
// prescale pbuffer
|
// prescale pbuffer
|
||||||
float tmp[XCORR_UPDATE_SEQUENCE / 2];
|
float tmp[XCORR_UPDATE_SEQUENCE / 2];
|
||||||
|
@ -353,7 +352,7 @@ void BPMDetect::updateBeatPos(int process_samples)
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int offs = windowStart; offs < windowLen; offs++)
|
for (int offs = windowStart; offs < windowLen; offs++)
|
||||||
{
|
{
|
||||||
double sum = 0;
|
float sum = 0;
|
||||||
for (int i = 0; i < process_samples; i++)
|
for (int i = 0; i < process_samples; i++)
|
||||||
{
|
{
|
||||||
sum += tmp[i] * pBuffer[offs + i];
|
sum += tmp[i] * pBuffer[offs + i];
|
||||||
|
@ -562,7 +561,7 @@ float BPMDetect::getBpm()
|
||||||
/// \return number of beats in the arrays.
|
/// \return number of beats in the arrays.
|
||||||
int BPMDetect::getBeats(float *pos, float *values, int max_num)
|
int BPMDetect::getBeats(float *pos, float *values, int max_num)
|
||||||
{
|
{
|
||||||
int num = beats.size();
|
int num = (int)beats.size();
|
||||||
if ((!pos) || (!values)) return num; // pos or values NULL, return just size
|
if ((!pos) || (!values)) return num; // pos or values NULL, return just size
|
||||||
|
|
||||||
for (int i = 0; (i < num) && (i < max_num); i++)
|
for (int i = 0; (i < num) && (i < max_num); i++)
|
||||||
|
|
|
@ -265,3 +265,11 @@ uint FIFOSampleBuffer::adjustAmountOfSamples(uint numSamples)
|
||||||
}
|
}
|
||||||
return samplesInBuffer;
|
return samplesInBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Add silence to end of buffer
|
||||||
|
void FIFOSampleBuffer::addSilent(uint nSamples)
|
||||||
|
{
|
||||||
|
memset(ptrEnd(nSamples), 0, sizeof(SAMPLETYPE) * nSamples * channels);
|
||||||
|
samplesInBuffer += nSamples;
|
||||||
|
}
|
||||||
|
|
|
@ -60,12 +60,14 @@ FIRFilter::FIRFilter()
|
||||||
length = 0;
|
length = 0;
|
||||||
lengthDiv8 = 0;
|
lengthDiv8 = 0;
|
||||||
filterCoeffs = NULL;
|
filterCoeffs = NULL;
|
||||||
|
filterCoeffsStereo = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
FIRFilter::~FIRFilter()
|
FIRFilter::~FIRFilter()
|
||||||
{
|
{
|
||||||
delete[] filterCoeffs;
|
delete[] filterCoeffs;
|
||||||
|
delete[] filterCoeffsStereo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -78,35 +80,26 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
|
||||||
// because division is much slower operation than multiplying.
|
// because division is much slower operation than multiplying.
|
||||||
double dScaler = 1.0 / (double)resultDivider;
|
double dScaler = 1.0 / (double)resultDivider;
|
||||||
#endif
|
#endif
|
||||||
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = length & -8;
|
||||||
|
|
||||||
assert(length != 0);
|
assert((length != 0) && (length == ilength) && (src != NULL) && (dest != NULL) && (filterCoeffs != NULL));
|
||||||
assert(src != NULL);
|
|
||||||
assert(dest != NULL);
|
|
||||||
assert(filterCoeffs != NULL);
|
|
||||||
|
|
||||||
end = 2 * (numSamples - length);
|
end = 2 * (numSamples - ilength);
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (j = 0; j < end; j += 2)
|
for (j = 0; j < end; j += 2)
|
||||||
{
|
{
|
||||||
const SAMPLETYPE *ptr;
|
const SAMPLETYPE *ptr;
|
||||||
LONG_SAMPLETYPE suml, sumr;
|
LONG_SAMPLETYPE suml, sumr;
|
||||||
uint i;
|
|
||||||
|
|
||||||
suml = sumr = 0;
|
suml = sumr = 0;
|
||||||
ptr = src + j;
|
ptr = src + j;
|
||||||
|
|
||||||
for (i = 0; i < length; i += 4)
|
for (int i = 0; i < ilength; i ++)
|
||||||
{
|
{
|
||||||
// loop is unrolled by factor of 4 here for efficiency
|
suml += ptr[2 * i] * filterCoeffsStereo[2 * i];
|
||||||
suml += ptr[2 * i + 0] * filterCoeffs[i + 0] +
|
sumr += ptr[2 * i + 1] * filterCoeffsStereo[2 * i + 1];
|
||||||
ptr[2 * i + 2] * filterCoeffs[i + 1] +
|
|
||||||
ptr[2 * i + 4] * filterCoeffs[i + 2] +
|
|
||||||
ptr[2 * i + 6] * filterCoeffs[i + 3];
|
|
||||||
sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] +
|
|
||||||
ptr[2 * i + 3] * filterCoeffs[i + 1] +
|
|
||||||
ptr[2 * i + 5] * filterCoeffs[i + 2] +
|
|
||||||
ptr[2 * i + 7] * filterCoeffs[i + 3];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
|
@ -116,14 +109,11 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
|
||||||
suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
|
suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
|
||||||
// saturate to 16 bit integer limits
|
// saturate to 16 bit integer limits
|
||||||
sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
|
sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
|
||||||
#else
|
|
||||||
suml *= dScaler;
|
|
||||||
sumr *= dScaler;
|
|
||||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
dest[j] = (SAMPLETYPE)suml;
|
dest[j] = (SAMPLETYPE)suml;
|
||||||
dest[j + 1] = (SAMPLETYPE)sumr;
|
dest[j + 1] = (SAMPLETYPE)sumr;
|
||||||
}
|
}
|
||||||
return numSamples - length;
|
return numSamples - ilength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -137,31 +127,28 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
|
||||||
double dScaler = 1.0 / (double)resultDivider;
|
double dScaler = 1.0 / (double)resultDivider;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
assert(length != 0);
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = length & -8;
|
||||||
|
|
||||||
end = numSamples - length;
|
assert(ilength != 0);
|
||||||
|
|
||||||
|
end = numSamples - ilength;
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (j = 0; j < end; j ++)
|
for (j = 0; j < end; j ++)
|
||||||
{
|
{
|
||||||
const SAMPLETYPE *pSrc = src + j;
|
const SAMPLETYPE *pSrc = src + j;
|
||||||
LONG_SAMPLETYPE sum;
|
LONG_SAMPLETYPE sum;
|
||||||
uint i;
|
int i;
|
||||||
|
|
||||||
sum = 0;
|
sum = 0;
|
||||||
for (i = 0; i < length; i += 4)
|
for (i = 0; i < ilength; i ++)
|
||||||
{
|
{
|
||||||
// loop is unrolled by factor of 4 here for efficiency
|
sum += pSrc[i] * filterCoeffs[i];
|
||||||
sum += pSrc[i + 0] * filterCoeffs[i + 0] +
|
|
||||||
pSrc[i + 1] * filterCoeffs[i + 1] +
|
|
||||||
pSrc[i + 2] * filterCoeffs[i + 2] +
|
|
||||||
pSrc[i + 3] * filterCoeffs[i + 3];
|
|
||||||
}
|
}
|
||||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
sum >>= resultDivFactor;
|
sum >>= resultDivFactor;
|
||||||
// saturate to 16 bit integer limits
|
// saturate to 16 bit integer limits
|
||||||
sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
|
sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
|
||||||
#else
|
|
||||||
sum *= dScaler;
|
|
||||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
dest[j] = (SAMPLETYPE)sum;
|
dest[j] = (SAMPLETYPE)sum;
|
||||||
}
|
}
|
||||||
|
@ -185,14 +172,18 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
|
||||||
assert(filterCoeffs != NULL);
|
assert(filterCoeffs != NULL);
|
||||||
assert(numChannels < 16);
|
assert(numChannels < 16);
|
||||||
|
|
||||||
end = numChannels * (numSamples - length);
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = length & -8;
|
||||||
|
|
||||||
|
end = numChannels * (numSamples - ilength);
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (j = 0; j < end; j += numChannels)
|
for (j = 0; j < end; j += numChannels)
|
||||||
{
|
{
|
||||||
const SAMPLETYPE *ptr;
|
const SAMPLETYPE *ptr;
|
||||||
LONG_SAMPLETYPE sums[16];
|
LONG_SAMPLETYPE sums[16];
|
||||||
uint c, i;
|
uint c;
|
||||||
|
int i;
|
||||||
|
|
||||||
for (c = 0; c < numChannels; c ++)
|
for (c = 0; c < numChannels; c ++)
|
||||||
{
|
{
|
||||||
|
@ -201,7 +192,7 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
|
||||||
|
|
||||||
ptr = src + j;
|
ptr = src + j;
|
||||||
|
|
||||||
for (i = 0; i < length; i ++)
|
for (i = 0; i < ilength; i ++)
|
||||||
{
|
{
|
||||||
SAMPLETYPE coef=filterCoeffs[i];
|
SAMPLETYPE coef=filterCoeffs[i];
|
||||||
for (c = 0; c < numChannels; c ++)
|
for (c = 0; c < numChannels; c ++)
|
||||||
|
@ -215,13 +206,11 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
|
||||||
{
|
{
|
||||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
sums[c] >>= resultDivFactor;
|
sums[c] >>= resultDivFactor;
|
||||||
#else
|
|
||||||
sums[c] *= dScaler;
|
|
||||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||||
dest[j+c] = (SAMPLETYPE)sums[c];
|
dest[j+c] = (SAMPLETYPE)sums[c];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return numSamples - length;
|
return numSamples - ilength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -233,6 +222,13 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u
|
||||||
assert(newLength > 0);
|
assert(newLength > 0);
|
||||||
if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");
|
if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");
|
||||||
|
|
||||||
|
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
|
||||||
|
// scale coefficients already here if using floating samples
|
||||||
|
double scale = 1.0 / resultDivider;
|
||||||
|
#else
|
||||||
|
short scale = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
lengthDiv8 = newLength / 8;
|
lengthDiv8 = newLength / 8;
|
||||||
length = lengthDiv8 * 8;
|
length = lengthDiv8 * 8;
|
||||||
assert(length == newLength);
|
assert(length == newLength);
|
||||||
|
@ -242,7 +238,16 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u
|
||||||
|
|
||||||
delete[] filterCoeffs;
|
delete[] filterCoeffs;
|
||||||
filterCoeffs = new SAMPLETYPE[length];
|
filterCoeffs = new SAMPLETYPE[length];
|
||||||
memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE));
|
delete[] filterCoeffsStereo;
|
||||||
|
filterCoeffsStereo = new SAMPLETYPE[length*2];
|
||||||
|
for (uint i = 0; i < length; i ++)
|
||||||
|
{
|
||||||
|
filterCoeffs[i] = (SAMPLETYPE)(coeffs[i] * scale);
|
||||||
|
// create also stereo set of filter coefficients: this allows compiler
|
||||||
|
// to autovectorize filter evaluation much more efficiently
|
||||||
|
filterCoeffsStereo[2 * i] = (SAMPLETYPE)(coeffs[i] * scale);
|
||||||
|
filterCoeffsStereo[2 * i + 1] = (SAMPLETYPE)(coeffs[i] * scale);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,7 @@ protected:
|
||||||
|
|
||||||
// Memory for filter coefficients
|
// Memory for filter coefficients
|
||||||
SAMPLETYPE *filterCoeffs;
|
SAMPLETYPE *filterCoeffs;
|
||||||
|
SAMPLETYPE *filterCoeffsStereo;
|
||||||
|
|
||||||
virtual uint evaluateFilterStereo(SAMPLETYPE *dest,
|
virtual uint evaluateFilterStereo(SAMPLETYPE *dest,
|
||||||
const SAMPLETYPE *src,
|
const SAMPLETYPE *src,
|
||||||
|
|
|
@ -41,7 +41,6 @@ namespace soundtouch
|
||||||
class InterpolateCubic : public TransposerBase
|
class InterpolateCubic : public TransposerBase
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
virtual void resetRegisters();
|
|
||||||
virtual int transposeMono(SAMPLETYPE *dest,
|
virtual int transposeMono(SAMPLETYPE *dest,
|
||||||
const SAMPLETYPE *src,
|
const SAMPLETYPE *src,
|
||||||
int &srcSamples);
|
int &srcSamples);
|
||||||
|
@ -56,6 +55,13 @@ protected:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
InterpolateCubic();
|
InterpolateCubic();
|
||||||
|
|
||||||
|
virtual void resetRegisters();
|
||||||
|
|
||||||
|
int getLatency() const
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -142,7 +142,7 @@ int InterpolateLinearInteger::transposeMulti(SAMPLETYPE *dest, const SAMPLETYPE
|
||||||
LONG_SAMPLETYPE temp, vol1;
|
LONG_SAMPLETYPE temp, vol1;
|
||||||
|
|
||||||
assert(iFract < SCALE);
|
assert(iFract < SCALE);
|
||||||
vol1 = (SCALE - iFract);
|
vol1 = (LONG_SAMPLETYPE)(SCALE - iFract);
|
||||||
for (int c = 0; c < numChannels; c ++)
|
for (int c = 0; c < numChannels; c ++)
|
||||||
{
|
{
|
||||||
temp = vol1 * src[c] + iFract * src[c + numChannels];
|
temp = vol1 * src[c] + iFract * src[c + numChannels];
|
||||||
|
|
|
@ -45,8 +45,6 @@ protected:
|
||||||
int iFract;
|
int iFract;
|
||||||
int iRate;
|
int iRate;
|
||||||
|
|
||||||
virtual void resetRegisters();
|
|
||||||
|
|
||||||
virtual int transposeMono(SAMPLETYPE *dest,
|
virtual int transposeMono(SAMPLETYPE *dest,
|
||||||
const SAMPLETYPE *src,
|
const SAMPLETYPE *src,
|
||||||
int &srcSamples);
|
int &srcSamples);
|
||||||
|
@ -60,6 +58,13 @@ public:
|
||||||
/// Sets new target rate. Normal rate = 1.0, smaller values represent slower
|
/// Sets new target rate. Normal rate = 1.0, smaller values represent slower
|
||||||
/// rate, larger faster rates.
|
/// rate, larger faster rates.
|
||||||
virtual void setRate(double newRate);
|
virtual void setRate(double newRate);
|
||||||
|
|
||||||
|
virtual void resetRegisters();
|
||||||
|
|
||||||
|
int getLatency() const
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -69,8 +74,6 @@ class InterpolateLinearFloat : public TransposerBase
|
||||||
protected:
|
protected:
|
||||||
double fract;
|
double fract;
|
||||||
|
|
||||||
virtual void resetRegisters();
|
|
||||||
|
|
||||||
virtual int transposeMono(SAMPLETYPE *dest,
|
virtual int transposeMono(SAMPLETYPE *dest,
|
||||||
const SAMPLETYPE *src,
|
const SAMPLETYPE *src,
|
||||||
int &srcSamples);
|
int &srcSamples);
|
||||||
|
@ -81,6 +84,13 @@ protected:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
InterpolateLinearFloat();
|
InterpolateLinearFloat();
|
||||||
|
|
||||||
|
virtual void resetRegisters();
|
||||||
|
|
||||||
|
int getLatency() const
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,6 @@ namespace soundtouch
|
||||||
class InterpolateShannon : public TransposerBase
|
class InterpolateShannon : public TransposerBase
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
void resetRegisters();
|
|
||||||
int transposeMono(SAMPLETYPE *dest,
|
int transposeMono(SAMPLETYPE *dest,
|
||||||
const SAMPLETYPE *src,
|
const SAMPLETYPE *src,
|
||||||
int &srcSamples);
|
int &srcSamples);
|
||||||
|
@ -61,6 +60,13 @@ protected:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
InterpolateShannon();
|
InterpolateShannon();
|
||||||
|
|
||||||
|
void resetRegisters();
|
||||||
|
|
||||||
|
int getLatency() const
|
||||||
|
{
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,7 +57,7 @@ int PeakFinder::findTop(const float *data, int peakpos) const
|
||||||
|
|
||||||
refvalue = data[peakpos];
|
refvalue = data[peakpos];
|
||||||
|
|
||||||
// seek within <EFBFBD>10 points
|
// seek within ±10 points
|
||||||
start = peakpos - 10;
|
start = peakpos - 10;
|
||||||
if (start < minPos) start = minPos;
|
if (start < minPos) start = minPos;
|
||||||
end = peakpos + 10;
|
end = peakpos + 10;
|
||||||
|
@ -142,7 +142,7 @@ int PeakFinder::findCrossingLevel(const float *data, float level, int peakpos, i
|
||||||
peaklevel = data[peakpos];
|
peaklevel = data[peakpos];
|
||||||
assert(peaklevel >= level);
|
assert(peaklevel >= level);
|
||||||
pos = peakpos;
|
pos = peakpos;
|
||||||
while ((pos >= minPos) && (pos < maxPos))
|
while ((pos >= minPos) && (pos + direction < maxPos))
|
||||||
{
|
{
|
||||||
if (data[pos + direction] < level) return pos; // crossing found
|
if (data[pos + direction] < level) return pos; // crossing found
|
||||||
pos += direction;
|
pos += direction;
|
||||||
|
@ -256,7 +256,7 @@ double PeakFinder::detectPeak(const float *data, int aminPos, int amaxPos)
|
||||||
|
|
||||||
// accept harmonic peak if
|
// accept harmonic peak if
|
||||||
// (a) it is found
|
// (a) it is found
|
||||||
// (b) is within <EFBFBD>4% of the expected harmonic interval
|
// (b) is within ±4% of the expected harmonic interval
|
||||||
// (c) has at least half x-corr value of the max. peak
|
// (c) has at least half x-corr value of the max. peak
|
||||||
|
|
||||||
double diff = harmonic * peaktmp / highPeak;
|
double diff = harmonic * peaktmp / highPeak;
|
||||||
|
|
|
@ -61,6 +61,7 @@ RateTransposer::RateTransposer() : FIFOProcessor(&outputBuffer)
|
||||||
// Instantiates the anti-alias filter
|
// Instantiates the anti-alias filter
|
||||||
pAAFilter = new AAFilter(64);
|
pAAFilter = new AAFilter(64);
|
||||||
pTransposer = TransposerBase::newInstance();
|
pTransposer = TransposerBase::newInstance();
|
||||||
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,6 +78,7 @@ void RateTransposer::enableAAFilter(bool newMode)
|
||||||
#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER
|
#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER
|
||||||
// Disable Anti-alias filter if desirable to avoid click at rate change zero value crossover
|
// Disable Anti-alias filter if desirable to avoid click at rate change zero value crossover
|
||||||
bUseAAFilter = newMode;
|
bUseAAFilter = newMode;
|
||||||
|
clear();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,6 +194,11 @@ void RateTransposer::clear()
|
||||||
outputBuffer.clear();
|
outputBuffer.clear();
|
||||||
midBuffer.clear();
|
midBuffer.clear();
|
||||||
inputBuffer.clear();
|
inputBuffer.clear();
|
||||||
|
pTransposer->resetRegisters();
|
||||||
|
|
||||||
|
// prefill buffer to avoid losing first samples at beginning of stream
|
||||||
|
int prefill = getLatency();
|
||||||
|
inputBuffer.addSilent(prefill);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -209,7 +216,8 @@ int RateTransposer::isEmpty() const
|
||||||
/// Return approximate initial input-output latency
|
/// Return approximate initial input-output latency
|
||||||
int RateTransposer::getLatency() const
|
int RateTransposer::getLatency() const
|
||||||
{
|
{
|
||||||
return (bUseAAFilter) ? pAAFilter->getLength() : 0;
|
return pTransposer->getLatency() +
|
||||||
|
((bUseAAFilter) ? (pAAFilter->getLength() / 2) : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -59,8 +59,6 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void resetRegisters() = 0;
|
|
||||||
|
|
||||||
virtual int transposeMono(SAMPLETYPE *dest,
|
virtual int transposeMono(SAMPLETYPE *dest,
|
||||||
const SAMPLETYPE *src,
|
const SAMPLETYPE *src,
|
||||||
int &srcSamples) = 0;
|
int &srcSamples) = 0;
|
||||||
|
@ -83,6 +81,9 @@ public:
|
||||||
virtual int transpose(FIFOSampleBuffer &dest, FIFOSampleBuffer &src);
|
virtual int transpose(FIFOSampleBuffer &dest, FIFOSampleBuffer &src);
|
||||||
virtual void setRate(double newRate);
|
virtual void setRate(double newRate);
|
||||||
virtual void setChannels(int channels);
|
virtual void setChannels(int channels);
|
||||||
|
virtual int getLatency() const = 0;
|
||||||
|
|
||||||
|
virtual void resetRegisters() = 0;
|
||||||
|
|
||||||
// static factory function
|
// static factory function
|
||||||
static TransposerBase *newInstance();
|
static TransposerBase *newInstance();
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
///
|
///
|
||||||
/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
|
/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
|
||||||
/// while maintaining the original pitch by using a time domain WSOLA-like
|
/// while maintaining the original pitch by using a time domain WSOLA-like
|
||||||
|
@ -54,7 +54,6 @@ using namespace soundtouch;
|
||||||
|
|
||||||
#define max(x, y) (((x) > (y)) ? (x) : (y))
|
#define max(x, y) (((x) > (y)) ? (x) : (y))
|
||||||
|
|
||||||
|
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
*
|
*
|
||||||
* Constant definitions
|
* Constant definitions
|
||||||
|
@ -93,11 +92,6 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
|
||||||
bAutoSeqSetting = true;
|
bAutoSeqSetting = true;
|
||||||
bAutoSeekSetting = true;
|
bAutoSeekSetting = true;
|
||||||
|
|
||||||
maxnorm = 0;
|
|
||||||
maxnormf = 1e8;
|
|
||||||
|
|
||||||
skipFract = 0;
|
|
||||||
|
|
||||||
tempo = 1.0f;
|
tempo = 1.0f;
|
||||||
setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
|
setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
|
||||||
setTempo(1.0f);
|
setTempo(1.0f);
|
||||||
|
@ -224,6 +218,9 @@ void TDStretch::clearInput()
|
||||||
inputBuffer.clear();
|
inputBuffer.clear();
|
||||||
clearMidBuffer();
|
clearMidBuffer();
|
||||||
isBeginning = true;
|
isBeginning = true;
|
||||||
|
maxnorm = 0;
|
||||||
|
maxnormf = 1e8;
|
||||||
|
skipFract = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -315,9 +312,10 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
|
||||||
{
|
{
|
||||||
double corr;
|
double corr;
|
||||||
// Calculates correlation value for the mixing position corresponding to 'i'
|
// Calculates correlation value for the mixing position corresponding to 'i'
|
||||||
#ifdef _OPENMP
|
#if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED)
|
||||||
// in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
|
// in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
|
||||||
// iterate the loop in sequential order
|
// iterate the loop in sequential order
|
||||||
|
// in SIMD mode, avoid accumulator version to allow avoiding unaligned positions
|
||||||
corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
|
corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
|
||||||
#else
|
#else
|
||||||
// In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
|
// In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
|
||||||
|
@ -675,11 +673,10 @@ void TDStretch::processSamples()
|
||||||
// Adjust processing offset at beginning of track by not perform initial overlapping
|
// Adjust processing offset at beginning of track by not perform initial overlapping
|
||||||
// and compensating that in the 'input buffer skip' calculation
|
// and compensating that in the 'input buffer skip' calculation
|
||||||
isBeginning = false;
|
isBeginning = false;
|
||||||
int skip = (int)(tempo * overlapLength + 0.5);
|
int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5);
|
||||||
|
|
||||||
#ifdef SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
|
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||||
#ifdef SOUNDTOUCH_ALLOW_SSE
|
// in SIMD mode, round the skip amount to value corresponding to aligned memory address
|
||||||
// if SSE mode, round the skip amount to value corresponding to aligned memory address
|
|
||||||
if (channels == 1)
|
if (channels == 1)
|
||||||
{
|
{
|
||||||
skip &= -4;
|
skip &= -4;
|
||||||
|
@ -689,9 +686,11 @@ void TDStretch::processSamples()
|
||||||
skip &= -2;
|
skip &= -2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
skipFract -= skip;
|
skipFract -= skip;
|
||||||
assert(nominalSkip >= -skipFract);
|
if (skipFract <= -nominalSkip)
|
||||||
|
{
|
||||||
|
skipFract = -nominalSkip;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ... then copy sequence samples from 'inputBuffer' to output:
|
// ... then copy sequence samples from 'inputBuffer' to output:
|
||||||
|
@ -830,21 +829,19 @@ void TDStretch::overlapStereo(short *poutput, const short *input) const
|
||||||
|
|
||||||
// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
|
// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
|
||||||
// version of the routine.
|
// version of the routine.
|
||||||
void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const
|
void TDStretch::overlapMulti(short *poutput, const short *input) const
|
||||||
{
|
{
|
||||||
SAMPLETYPE m1=(SAMPLETYPE)0;
|
short m1;
|
||||||
SAMPLETYPE m2;
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --)
|
for (m1 = 0; m1 < overlapLength; m1 ++)
|
||||||
{
|
{
|
||||||
|
short m2 = (short)(overlapLength - m1);
|
||||||
for (int c = 0; c < channels; c ++)
|
for (int c = 0; c < channels; c ++)
|
||||||
{
|
{
|
||||||
poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
|
poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
m1++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -889,20 +886,23 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
|
||||||
unsigned long lnorm;
|
unsigned long lnorm;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||||
|
// in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
|
||||||
|
if (((ulongptr)mixingPos) & 15) return -1e50;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = (channels * overlapLength) & -8;
|
||||||
|
|
||||||
corr = lnorm = 0;
|
corr = lnorm = 0;
|
||||||
// Same routine for stereo and mono. For stereo, unroll loop for better
|
// Same routine for stereo and mono
|
||||||
// efficiency and gives slightly better resolution against rounding.
|
for (i = 0; i < ilength; i += 2)
|
||||||
// For mono it same routine, just unrolls loop by factor of 4
|
|
||||||
for (i = 0; i < channels * overlapLength; i += 4)
|
|
||||||
{
|
{
|
||||||
corr += (mixingPos[i] * compare[i] +
|
corr += (mixingPos[i] * compare[i] +
|
||||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
|
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
||||||
corr += (mixingPos[i + 2] * compare[i + 2] +
|
|
||||||
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm;
|
|
||||||
lnorm += (mixingPos[i] * mixingPos[i] +
|
lnorm += (mixingPos[i] * mixingPos[i] +
|
||||||
mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
|
mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm;
|
||||||
lnorm += (mixingPos[i + 2] * mixingPos[i + 2] +
|
// do intermediate scalings to avoid integer overflow
|
||||||
mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBitsNorm;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lnorm > maxnorm)
|
if (lnorm > maxnorm)
|
||||||
|
@ -925,9 +925,12 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
|
||||||
double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
|
double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
|
||||||
{
|
{
|
||||||
long corr;
|
long corr;
|
||||||
unsigned long lnorm;
|
long lnorm;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = (channels * overlapLength) & -8;
|
||||||
|
|
||||||
// cancel first normalizer tap from previous round
|
// cancel first normalizer tap from previous round
|
||||||
lnorm = 0;
|
lnorm = 0;
|
||||||
for (i = 1; i <= channels; i ++)
|
for (i = 1; i <= channels; i ++)
|
||||||
|
@ -936,15 +939,11 @@ double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *c
|
||||||
}
|
}
|
||||||
|
|
||||||
corr = 0;
|
corr = 0;
|
||||||
// Same routine for stereo and mono. For stereo, unroll loop for better
|
// Same routine for stereo and mono.
|
||||||
// efficiency and gives slightly better resolution against rounding.
|
for (i = 0; i < ilength; i += 2)
|
||||||
// For mono it same routine, just unrolls loop by factor of 4
|
|
||||||
for (i = 0; i < channels * overlapLength; i += 4)
|
|
||||||
{
|
{
|
||||||
corr += (mixingPos[i] * compare[i] +
|
corr += (mixingPos[i] * compare[i] +
|
||||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
|
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
||||||
corr += (mixingPos[i + 2] * compare[i + 2] +
|
|
||||||
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// update normalizer with last samples of this round
|
// update normalizer with last samples of this round
|
||||||
|
@ -1045,27 +1044,24 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)
|
||||||
/// Calculate cross-correlation
|
/// Calculate cross-correlation
|
||||||
double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
|
double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
|
||||||
{
|
{
|
||||||
double corr;
|
float corr;
|
||||||
double norm;
|
float norm;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||||
|
// in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
|
||||||
|
if (((ulongptr)mixingPos) & 15) return -1e50;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
|
int ilength = (channels * overlapLength) & -8;
|
||||||
|
|
||||||
corr = norm = 0;
|
corr = norm = 0;
|
||||||
// Same routine for stereo and mono. For Stereo, unroll by factor of 2.
|
// Same routine for stereo and mono
|
||||||
// For mono it's same routine yet unrollsd by factor of 4.
|
for (i = 0; i < ilength; i ++)
|
||||||
for (i = 0; i < channels * overlapLength; i += 4)
|
|
||||||
{
|
{
|
||||||
corr += mixingPos[i] * compare[i] +
|
corr += mixingPos[i] * compare[i];
|
||||||
mixingPos[i + 1] * compare[i + 1];
|
norm += mixingPos[i] * mixingPos[i];
|
||||||
|
|
||||||
norm += mixingPos[i] * mixingPos[i] +
|
|
||||||
mixingPos[i + 1] * mixingPos[i + 1];
|
|
||||||
|
|
||||||
// unroll the loop for better CPU efficiency:
|
|
||||||
corr += mixingPos[i + 2] * compare[i + 2] +
|
|
||||||
mixingPos[i + 3] * compare[i + 3];
|
|
||||||
|
|
||||||
norm += mixingPos[i + 2] * mixingPos[i + 2] +
|
|
||||||
mixingPos[i + 3] * mixingPos[i + 3];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
anorm = norm;
|
anorm = norm;
|
||||||
|
@ -1076,7 +1072,7 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, do
|
||||||
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
|
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
|
||||||
double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
|
double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
|
||||||
{
|
{
|
||||||
double corr;
|
float corr;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
corr = 0;
|
corr = 0;
|
||||||
|
@ -1087,14 +1083,13 @@ double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *c
|
||||||
norm -= mixingPos[-i] * mixingPos[-i];
|
norm -= mixingPos[-i] * mixingPos[-i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Same routine for stereo and mono. For Stereo, unroll by factor of 2.
|
// hint compiler autovectorization that loop length is divisible by 8
|
||||||
// For mono it's same routine yet unrollsd by factor of 4.
|
int ilength = (channels * overlapLength) & -8;
|
||||||
for (i = 0; i < channels * overlapLength; i += 4)
|
|
||||||
|
// Same routine for stereo and mono
|
||||||
|
for (i = 0; i < ilength; i ++)
|
||||||
{
|
{
|
||||||
corr += mixingPos[i] * compare[i] +
|
corr += mixingPos[i] * compare[i];
|
||||||
mixingPos[i + 1] * compare[i + 1] +
|
|
||||||
mixingPos[i + 2] * compare[i + 2] +
|
|
||||||
mixingPos[i + 3] * compare[i + 3];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// update normalizer with last samples of this round
|
// update normalizer with last samples of this round
|
||||||
|
|
|
@ -80,7 +80,7 @@ double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &a
|
||||||
// Compile-time define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION is provided
|
// Compile-time define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION is provided
|
||||||
// for choosing if this little cheating is allowed.
|
// for choosing if this little cheating is allowed.
|
||||||
|
|
||||||
#ifdef SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
|
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||||
// Little cheating allowed, return valid correlation only for
|
// Little cheating allowed, return valid correlation only for
|
||||||
// aligned locations, meaning every second round for stereo sound.
|
// aligned locations, meaning every second round for stereo sound.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue