mirror of https://github.com/PCSX2/pcsx2.git
3rdparty: Upgrade soundtouch lib to 2.3.1
This commit is contained in:
parent
791f2a63ac
commit
e37afd6976
|
@ -2,7 +2,7 @@
|
|||
Version 2.1, February 1999
|
||||
|
||||
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
|
||||
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
|
@ -117,7 +117,7 @@ be combined with the library in order to run.
|
|||
|
||||
0. This License Agreement applies to any software library or other
|
||||
program which contains a notice placed by the copyright holder or
|
||||
other authoried party saying it may be distributed under the terms of
|
||||
other authorized party saying it may be distributed under the terms of
|
||||
this Lesser General Public License (also called "this License").
|
||||
Each licensee is addressed as "you".
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -170,6 +170,9 @@ public:
|
|||
/// allow trimming (downwards) amount of samples in pipeline.
|
||||
/// Returns adjusted amount of samples
|
||||
uint adjustAmountOfSamples(uint numSamples);
|
||||
|
||||
/// Add silence to end of buffer
|
||||
void addSilent(uint nSamples);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -121,10 +121,10 @@ namespace soundtouch
|
|||
|
||||
#endif
|
||||
|
||||
// If defined, allows the SIMD-optimized routines to take minor shortcuts
|
||||
// for improved performance. Undefine to require faithfully similar SIMD
|
||||
// calculations as in normal C implementation.
|
||||
#define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION 1
|
||||
// If defined, allows the SIMD-optimized routines to skip unevenly aligned
|
||||
// memory offsets that can cause performance penalty in some SIMD implementations.
|
||||
// Causes slight compromise in sound quality.
|
||||
// #define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION 1
|
||||
|
||||
|
||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||
|
@ -149,8 +149,9 @@ namespace soundtouch
|
|||
|
||||
// floating point samples
|
||||
typedef float SAMPLETYPE;
|
||||
// data type for sample accumulation: Use double to utilize full precision.
|
||||
typedef double LONG_SAMPLETYPE;
|
||||
// data type for sample accumulation: Use float also here to enable
|
||||
// efficient autovectorization
|
||||
typedef float LONG_SAMPLETYPE;
|
||||
|
||||
#ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
|
||||
// Allow SSE optimizations
|
||||
|
@ -159,7 +160,13 @@ namespace soundtouch
|
|||
|
||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||
|
||||
};
|
||||
#if ((SOUNDTOUCH_ALLOW_SSE) || (__SSE__) || (SOUNDTOUCH_USE_NEON))
|
||||
#if SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
|
||||
#define ST_SIMD_AVOID_UNALIGNED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
// define ST_NO_EXCEPTION_HANDLING switch to disable throwing std exceptions:
|
||||
// #define ST_NO_EXCEPTION_HANDLING 1
|
||||
|
|
|
@ -72,10 +72,10 @@ namespace soundtouch
|
|||
{
|
||||
|
||||
/// Soundtouch library version string
|
||||
#define SOUNDTOUCH_VERSION "2.1.2"
|
||||
#define SOUNDTOUCH_VERSION "2.3.1"
|
||||
|
||||
/// SoundTouch library version id
|
||||
#define SOUNDTOUCH_VERSION_ID (20102)
|
||||
#define SOUNDTOUCH_VERSION_ID (20301)
|
||||
|
||||
//
|
||||
// Available setting IDs for the 'setSetting' & 'get_setting' functions:
|
||||
|
|
|
@ -313,7 +313,7 @@ void BPMDetect::updateXCorr(int process_samples)
|
|||
#pragma omp parallel for
|
||||
for (offs = windowStart; offs < windowLen; offs ++)
|
||||
{
|
||||
double sum;
|
||||
float sum;
|
||||
int i;
|
||||
|
||||
sum = 0;
|
||||
|
@ -341,7 +341,6 @@ void BPMDetect::updateBeatPos(int process_samples)
|
|||
// static double thr = 0.0003;
|
||||
double posScale = (double)this->decimateBy / (double)this->sampleRate;
|
||||
int resetDur = (int)(0.12 / posScale + 0.5);
|
||||
double corrScale = 1.0 / (double)(windowLen - windowStart);
|
||||
|
||||
// prescale pbuffer
|
||||
float tmp[XCORR_UPDATE_SEQUENCE / 2];
|
||||
|
@ -353,7 +352,7 @@ void BPMDetect::updateBeatPos(int process_samples)
|
|||
#pragma omp parallel for
|
||||
for (int offs = windowStart; offs < windowLen; offs++)
|
||||
{
|
||||
double sum = 0;
|
||||
float sum = 0;
|
||||
for (int i = 0; i < process_samples; i++)
|
||||
{
|
||||
sum += tmp[i] * pBuffer[offs + i];
|
||||
|
@ -562,7 +561,7 @@ float BPMDetect::getBpm()
|
|||
/// \return number of beats in the arrays.
|
||||
int BPMDetect::getBeats(float *pos, float *values, int max_num)
|
||||
{
|
||||
int num = beats.size();
|
||||
int num = (int)beats.size();
|
||||
if ((!pos) || (!values)) return num; // pos or values NULL, return just size
|
||||
|
||||
for (int i = 0; (i < num) && (i < max_num); i++)
|
||||
|
|
|
@ -265,3 +265,11 @@ uint FIFOSampleBuffer::adjustAmountOfSamples(uint numSamples)
|
|||
}
|
||||
return samplesInBuffer;
|
||||
}
|
||||
|
||||
|
||||
/// Add silence to end of buffer
|
||||
void FIFOSampleBuffer::addSilent(uint nSamples)
|
||||
{
|
||||
memset(ptrEnd(nSamples), 0, sizeof(SAMPLETYPE) * nSamples * channels);
|
||||
samplesInBuffer += nSamples;
|
||||
}
|
||||
|
|
|
@ -60,12 +60,14 @@ FIRFilter::FIRFilter()
|
|||
length = 0;
|
||||
lengthDiv8 = 0;
|
||||
filterCoeffs = NULL;
|
||||
filterCoeffsStereo = NULL;
|
||||
}
|
||||
|
||||
|
||||
FIRFilter::~FIRFilter()
|
||||
{
|
||||
delete[] filterCoeffs;
|
||||
delete[] filterCoeffsStereo;
|
||||
}
|
||||
|
||||
|
||||
|
@ -78,35 +80,26 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
|
|||
// because division is much slower operation than multiplying.
|
||||
double dScaler = 1.0 / (double)resultDivider;
|
||||
#endif
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = length & -8;
|
||||
|
||||
assert(length != 0);
|
||||
assert(src != NULL);
|
||||
assert(dest != NULL);
|
||||
assert(filterCoeffs != NULL);
|
||||
assert((length != 0) && (length == ilength) && (src != NULL) && (dest != NULL) && (filterCoeffs != NULL));
|
||||
|
||||
end = 2 * (numSamples - length);
|
||||
end = 2 * (numSamples - ilength);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (j = 0; j < end; j += 2)
|
||||
{
|
||||
const SAMPLETYPE *ptr;
|
||||
LONG_SAMPLETYPE suml, sumr;
|
||||
uint i;
|
||||
|
||||
suml = sumr = 0;
|
||||
ptr = src + j;
|
||||
|
||||
for (i = 0; i < length; i += 4)
|
||||
for (int i = 0; i < ilength; i ++)
|
||||
{
|
||||
// loop is unrolled by factor of 4 here for efficiency
|
||||
suml += ptr[2 * i + 0] * filterCoeffs[i + 0] +
|
||||
ptr[2 * i + 2] * filterCoeffs[i + 1] +
|
||||
ptr[2 * i + 4] * filterCoeffs[i + 2] +
|
||||
ptr[2 * i + 6] * filterCoeffs[i + 3];
|
||||
sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] +
|
||||
ptr[2 * i + 3] * filterCoeffs[i + 1] +
|
||||
ptr[2 * i + 5] * filterCoeffs[i + 2] +
|
||||
ptr[2 * i + 7] * filterCoeffs[i + 3];
|
||||
suml += ptr[2 * i] * filterCoeffsStereo[2 * i];
|
||||
sumr += ptr[2 * i + 1] * filterCoeffsStereo[2 * i + 1];
|
||||
}
|
||||
|
||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||
|
@ -116,14 +109,11 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
|
|||
suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
|
||||
// saturate to 16 bit integer limits
|
||||
sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
|
||||
#else
|
||||
suml *= dScaler;
|
||||
sumr *= dScaler;
|
||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||
dest[j] = (SAMPLETYPE)suml;
|
||||
dest[j + 1] = (SAMPLETYPE)sumr;
|
||||
}
|
||||
return numSamples - length;
|
||||
return numSamples - ilength;
|
||||
}
|
||||
|
||||
|
||||
|
@ -137,31 +127,28 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
|
|||
double dScaler = 1.0 / (double)resultDivider;
|
||||
#endif
|
||||
|
||||
assert(length != 0);
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = length & -8;
|
||||
|
||||
end = numSamples - length;
|
||||
assert(ilength != 0);
|
||||
|
||||
end = numSamples - ilength;
|
||||
#pragma omp parallel for
|
||||
for (j = 0; j < end; j ++)
|
||||
for (j = 0; j < end; j ++)
|
||||
{
|
||||
const SAMPLETYPE *pSrc = src + j;
|
||||
LONG_SAMPLETYPE sum;
|
||||
uint i;
|
||||
int i;
|
||||
|
||||
sum = 0;
|
||||
for (i = 0; i < length; i += 4)
|
||||
for (i = 0; i < ilength; i ++)
|
||||
{
|
||||
// loop is unrolled by factor of 4 here for efficiency
|
||||
sum += pSrc[i + 0] * filterCoeffs[i + 0] +
|
||||
pSrc[i + 1] * filterCoeffs[i + 1] +
|
||||
pSrc[i + 2] * filterCoeffs[i + 2] +
|
||||
pSrc[i + 3] * filterCoeffs[i + 3];
|
||||
sum += pSrc[i] * filterCoeffs[i];
|
||||
}
|
||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||
sum >>= resultDivFactor;
|
||||
// saturate to 16 bit integer limits
|
||||
sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
|
||||
#else
|
||||
sum *= dScaler;
|
||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||
dest[j] = (SAMPLETYPE)sum;
|
||||
}
|
||||
|
@ -185,14 +172,18 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
|
|||
assert(filterCoeffs != NULL);
|
||||
assert(numChannels < 16);
|
||||
|
||||
end = numChannels * (numSamples - length);
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = length & -8;
|
||||
|
||||
end = numChannels * (numSamples - ilength);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (j = 0; j < end; j += numChannels)
|
||||
{
|
||||
const SAMPLETYPE *ptr;
|
||||
LONG_SAMPLETYPE sums[16];
|
||||
uint c, i;
|
||||
uint c;
|
||||
int i;
|
||||
|
||||
for (c = 0; c < numChannels; c ++)
|
||||
{
|
||||
|
@ -201,7 +192,7 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
|
|||
|
||||
ptr = src + j;
|
||||
|
||||
for (i = 0; i < length; i ++)
|
||||
for (i = 0; i < ilength; i ++)
|
||||
{
|
||||
SAMPLETYPE coef=filterCoeffs[i];
|
||||
for (c = 0; c < numChannels; c ++)
|
||||
|
@ -215,13 +206,11 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
|
|||
{
|
||||
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
||||
sums[c] >>= resultDivFactor;
|
||||
#else
|
||||
sums[c] *= dScaler;
|
||||
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
||||
dest[j+c] = (SAMPLETYPE)sums[c];
|
||||
}
|
||||
}
|
||||
return numSamples - length;
|
||||
return numSamples - ilength;
|
||||
}
|
||||
|
||||
|
||||
|
@ -233,6 +222,13 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u
|
|||
assert(newLength > 0);
|
||||
if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");
|
||||
|
||||
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
|
||||
// scale coefficients already here if using floating samples
|
||||
double scale = 1.0 / resultDivider;
|
||||
#else
|
||||
short scale = 1;
|
||||
#endif
|
||||
|
||||
lengthDiv8 = newLength / 8;
|
||||
length = lengthDiv8 * 8;
|
||||
assert(length == newLength);
|
||||
|
@ -242,7 +238,16 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u
|
|||
|
||||
delete[] filterCoeffs;
|
||||
filterCoeffs = new SAMPLETYPE[length];
|
||||
memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE));
|
||||
delete[] filterCoeffsStereo;
|
||||
filterCoeffsStereo = new SAMPLETYPE[length*2];
|
||||
for (uint i = 0; i < length; i ++)
|
||||
{
|
||||
filterCoeffs[i] = (SAMPLETYPE)(coeffs[i] * scale);
|
||||
// create also stereo set of filter coefficients: this allows compiler
|
||||
// to autovectorize filter evaluation much more efficiently
|
||||
filterCoeffsStereo[2 * i] = (SAMPLETYPE)(coeffs[i] * scale);
|
||||
filterCoeffsStereo[2 * i + 1] = (SAMPLETYPE)(coeffs[i] * scale);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ protected:
|
|||
|
||||
// Memory for filter coefficients
|
||||
SAMPLETYPE *filterCoeffs;
|
||||
SAMPLETYPE *filterCoeffsStereo;
|
||||
|
||||
virtual uint evaluateFilterStereo(SAMPLETYPE *dest,
|
||||
const SAMPLETYPE *src,
|
||||
|
|
|
@ -41,7 +41,6 @@ namespace soundtouch
|
|||
class InterpolateCubic : public TransposerBase
|
||||
{
|
||||
protected:
|
||||
virtual void resetRegisters();
|
||||
virtual int transposeMono(SAMPLETYPE *dest,
|
||||
const SAMPLETYPE *src,
|
||||
int &srcSamples);
|
||||
|
@ -56,6 +55,13 @@ protected:
|
|||
|
||||
public:
|
||||
InterpolateCubic();
|
||||
|
||||
virtual void resetRegisters();
|
||||
|
||||
int getLatency() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -142,7 +142,7 @@ int InterpolateLinearInteger::transposeMulti(SAMPLETYPE *dest, const SAMPLETYPE
|
|||
LONG_SAMPLETYPE temp, vol1;
|
||||
|
||||
assert(iFract < SCALE);
|
||||
vol1 = (SCALE - iFract);
|
||||
vol1 = (LONG_SAMPLETYPE)(SCALE - iFract);
|
||||
for (int c = 0; c < numChannels; c ++)
|
||||
{
|
||||
temp = vol1 * src[c] + iFract * src[c + numChannels];
|
||||
|
|
|
@ -45,8 +45,6 @@ protected:
|
|||
int iFract;
|
||||
int iRate;
|
||||
|
||||
virtual void resetRegisters();
|
||||
|
||||
virtual int transposeMono(SAMPLETYPE *dest,
|
||||
const SAMPLETYPE *src,
|
||||
int &srcSamples);
|
||||
|
@ -60,6 +58,13 @@ public:
|
|||
/// Sets new target rate. Normal rate = 1.0, smaller values represent slower
|
||||
/// rate, larger faster rates.
|
||||
virtual void setRate(double newRate);
|
||||
|
||||
virtual void resetRegisters();
|
||||
|
||||
int getLatency() const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
@ -69,8 +74,6 @@ class InterpolateLinearFloat : public TransposerBase
|
|||
protected:
|
||||
double fract;
|
||||
|
||||
virtual void resetRegisters();
|
||||
|
||||
virtual int transposeMono(SAMPLETYPE *dest,
|
||||
const SAMPLETYPE *src,
|
||||
int &srcSamples);
|
||||
|
@ -81,6 +84,13 @@ protected:
|
|||
|
||||
public:
|
||||
InterpolateLinearFloat();
|
||||
|
||||
virtual void resetRegisters();
|
||||
|
||||
int getLatency() const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -46,7 +46,6 @@ namespace soundtouch
|
|||
class InterpolateShannon : public TransposerBase
|
||||
{
|
||||
protected:
|
||||
void resetRegisters();
|
||||
int transposeMono(SAMPLETYPE *dest,
|
||||
const SAMPLETYPE *src,
|
||||
int &srcSamples);
|
||||
|
@ -61,6 +60,13 @@ protected:
|
|||
|
||||
public:
|
||||
InterpolateShannon();
|
||||
|
||||
void resetRegisters();
|
||||
|
||||
int getLatency() const
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@ int PeakFinder::findTop(const float *data, int peakpos) const
|
|||
|
||||
refvalue = data[peakpos];
|
||||
|
||||
// seek within <EFBFBD>10 points
|
||||
// seek within ±10 points
|
||||
start = peakpos - 10;
|
||||
if (start < minPos) start = minPos;
|
||||
end = peakpos + 10;
|
||||
|
@ -142,7 +142,7 @@ int PeakFinder::findCrossingLevel(const float *data, float level, int peakpos, i
|
|||
peaklevel = data[peakpos];
|
||||
assert(peaklevel >= level);
|
||||
pos = peakpos;
|
||||
while ((pos >= minPos) && (pos < maxPos))
|
||||
while ((pos >= minPos) && (pos + direction < maxPos))
|
||||
{
|
||||
if (data[pos + direction] < level) return pos; // crossing found
|
||||
pos += direction;
|
||||
|
@ -256,7 +256,7 @@ double PeakFinder::detectPeak(const float *data, int aminPos, int amaxPos)
|
|||
|
||||
// accept harmonic peak if
|
||||
// (a) it is found
|
||||
// (b) is within <EFBFBD>4% of the expected harmonic interval
|
||||
// (b) is within ±4% of the expected harmonic interval
|
||||
// (c) has at least half x-corr value of the max. peak
|
||||
|
||||
double diff = harmonic * peaktmp / highPeak;
|
||||
|
|
|
@ -61,6 +61,7 @@ RateTransposer::RateTransposer() : FIFOProcessor(&outputBuffer)
|
|||
// Instantiates the anti-alias filter
|
||||
pAAFilter = new AAFilter(64);
|
||||
pTransposer = TransposerBase::newInstance();
|
||||
clear();
|
||||
}
|
||||
|
||||
|
||||
|
@ -77,6 +78,7 @@ void RateTransposer::enableAAFilter(bool newMode)
|
|||
#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER
|
||||
// Disable Anti-alias filter if desirable to avoid click at rate change zero value crossover
|
||||
bUseAAFilter = newMode;
|
||||
clear();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -192,6 +194,11 @@ void RateTransposer::clear()
|
|||
outputBuffer.clear();
|
||||
midBuffer.clear();
|
||||
inputBuffer.clear();
|
||||
pTransposer->resetRegisters();
|
||||
|
||||
// prefill buffer to avoid losing first samples at beginning of stream
|
||||
int prefill = getLatency();
|
||||
inputBuffer.addSilent(prefill);
|
||||
}
|
||||
|
||||
|
||||
|
@ -209,7 +216,8 @@ int RateTransposer::isEmpty() const
|
|||
/// Return approximate initial input-output latency
|
||||
int RateTransposer::getLatency() const
|
||||
{
|
||||
return (bUseAAFilter) ? pAAFilter->getLength() : 0;
|
||||
return pTransposer->getLatency() +
|
||||
((bUseAAFilter) ? (pAAFilter->getLength() / 2) : 0);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -59,8 +59,6 @@ public:
|
|||
};
|
||||
|
||||
protected:
|
||||
virtual void resetRegisters() = 0;
|
||||
|
||||
virtual int transposeMono(SAMPLETYPE *dest,
|
||||
const SAMPLETYPE *src,
|
||||
int &srcSamples) = 0;
|
||||
|
@ -83,6 +81,9 @@ public:
|
|||
virtual int transpose(FIFOSampleBuffer &dest, FIFOSampleBuffer &src);
|
||||
virtual void setRate(double newRate);
|
||||
virtual void setChannels(int channels);
|
||||
virtual int getLatency() const = 0;
|
||||
|
||||
virtual void resetRegisters() = 0;
|
||||
|
||||
// static factory function
|
||||
static TransposerBase *newInstance();
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
|
||||
/// while maintaining the original pitch by using a time domain WSOLA-like
|
||||
|
@ -54,7 +54,6 @@ using namespace soundtouch;
|
|||
|
||||
#define max(x, y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* Constant definitions
|
||||
|
@ -93,11 +92,6 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
|
|||
bAutoSeqSetting = true;
|
||||
bAutoSeekSetting = true;
|
||||
|
||||
maxnorm = 0;
|
||||
maxnormf = 1e8;
|
||||
|
||||
skipFract = 0;
|
||||
|
||||
tempo = 1.0f;
|
||||
setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
|
||||
setTempo(1.0f);
|
||||
|
@ -203,7 +197,7 @@ void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
|
|||
m1 = (SAMPLETYPE)0;
|
||||
m2 = (SAMPLETYPE)overlapLength;
|
||||
|
||||
for (i = 0; i < overlapLength ; i ++)
|
||||
for (i = 0; i < overlapLength ; i ++)
|
||||
{
|
||||
pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
|
||||
m1 += 1;
|
||||
|
@ -224,6 +218,9 @@ void TDStretch::clearInput()
|
|||
inputBuffer.clear();
|
||||
clearMidBuffer();
|
||||
isBeginning = true;
|
||||
maxnorm = 0;
|
||||
maxnormf = 1e8;
|
||||
skipFract = 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -311,13 +308,14 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
|
|||
bestCorr = (bestCorr + 0.1) * 0.75;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (i = 1; i < seekLength; i ++)
|
||||
for (i = 1; i < seekLength; i ++)
|
||||
{
|
||||
double corr;
|
||||
// Calculates correlation value for the mixing position corresponding to 'i'
|
||||
#ifdef _OPENMP
|
||||
#if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED)
|
||||
// in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
|
||||
// iterate the loop in sequential order
|
||||
// in SIMD mode, avoid accumulator version to allow avoiding unaligned positions
|
||||
corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
|
||||
#else
|
||||
// In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
|
||||
|
@ -675,23 +673,24 @@ void TDStretch::processSamples()
|
|||
// Adjust processing offset at beginning of track by not perform initial overlapping
|
||||
// and compensating that in the 'input buffer skip' calculation
|
||||
isBeginning = false;
|
||||
int skip = (int)(tempo * overlapLength + 0.5);
|
||||
int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5);
|
||||
|
||||
#ifdef SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
|
||||
#ifdef SOUNDTOUCH_ALLOW_SSE
|
||||
// if SSE mode, round the skip amount to value corresponding to aligned memory address
|
||||
if (channels == 1)
|
||||
{
|
||||
skip &= -4;
|
||||
}
|
||||
else if (channels == 2)
|
||||
{
|
||||
skip &= -2;
|
||||
}
|
||||
#endif
|
||||
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||
// in SIMD mode, round the skip amount to value corresponding to aligned memory address
|
||||
if (channels == 1)
|
||||
{
|
||||
skip &= -4;
|
||||
}
|
||||
else if (channels == 2)
|
||||
{
|
||||
skip &= -2;
|
||||
}
|
||||
#endif
|
||||
skipFract -= skip;
|
||||
assert(nominalSkip >= -skipFract);
|
||||
if (skipFract <= -nominalSkip)
|
||||
{
|
||||
skipFract = -nominalSkip;
|
||||
}
|
||||
}
|
||||
|
||||
// ... then copy sequence samples from 'inputBuffer' to output:
|
||||
|
@ -818,7 +817,7 @@ void TDStretch::overlapStereo(short *poutput, const short *input) const
|
|||
short temp;
|
||||
int cnt2;
|
||||
|
||||
for (i = 0; i < overlapLength ; i ++)
|
||||
for (i = 0; i < overlapLength ; i ++)
|
||||
{
|
||||
temp = (short)(overlapLength - i);
|
||||
cnt2 = 2 * i;
|
||||
|
@ -830,21 +829,19 @@ void TDStretch::overlapStereo(short *poutput, const short *input) const
|
|||
|
||||
// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
|
||||
// version of the routine.
|
||||
void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const
|
||||
void TDStretch::overlapMulti(short *poutput, const short *input) const
|
||||
{
|
||||
SAMPLETYPE m1=(SAMPLETYPE)0;
|
||||
SAMPLETYPE m2;
|
||||
int i=0;
|
||||
short m1;
|
||||
int i = 0;
|
||||
|
||||
for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --)
|
||||
for (m1 = 0; m1 < overlapLength; m1 ++)
|
||||
{
|
||||
short m2 = (short)(overlapLength - m1);
|
||||
for (int c = 0; c < channels; c ++)
|
||||
{
|
||||
poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
|
||||
i++;
|
||||
}
|
||||
|
||||
m1++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -889,20 +886,23 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
|
|||
unsigned long lnorm;
|
||||
int i;
|
||||
|
||||
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||
// in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
|
||||
if (((ulongptr)mixingPos) & 15) return -1e50;
|
||||
#endif
|
||||
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = (channels * overlapLength) & -8;
|
||||
|
||||
corr = lnorm = 0;
|
||||
// Same routine for stereo and mono. For stereo, unroll loop for better
|
||||
// efficiency and gives slightly better resolution against rounding.
|
||||
// For mono it same routine, just unrolls loop by factor of 4
|
||||
for (i = 0; i < channels * overlapLength; i += 4)
|
||||
// Same routine for stereo and mono
|
||||
for (i = 0; i < ilength; i += 2)
|
||||
{
|
||||
corr += (mixingPos[i] * compare[i] +
|
||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
|
||||
corr += (mixingPos[i + 2] * compare[i + 2] +
|
||||
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm;
|
||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
||||
lnorm += (mixingPos[i] * mixingPos[i] +
|
||||
mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
|
||||
lnorm += (mixingPos[i + 2] * mixingPos[i + 2] +
|
||||
mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBitsNorm;
|
||||
mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm;
|
||||
// do intermediate scalings to avoid integer overflow
|
||||
}
|
||||
|
||||
if (lnorm > maxnorm)
|
||||
|
@ -925,9 +925,12 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
|
|||
double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
|
||||
{
|
||||
long corr;
|
||||
unsigned long lnorm;
|
||||
long lnorm;
|
||||
int i;
|
||||
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = (channels * overlapLength) & -8;
|
||||
|
||||
// cancel first normalizer tap from previous round
|
||||
lnorm = 0;
|
||||
for (i = 1; i <= channels; i ++)
|
||||
|
@ -936,15 +939,11 @@ double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *c
|
|||
}
|
||||
|
||||
corr = 0;
|
||||
// Same routine for stereo and mono. For stereo, unroll loop for better
|
||||
// efficiency and gives slightly better resolution against rounding.
|
||||
// For mono it same routine, just unrolls loop by factor of 4
|
||||
for (i = 0; i < channels * overlapLength; i += 4)
|
||||
// Same routine for stereo and mono.
|
||||
for (i = 0; i < ilength; i += 2)
|
||||
{
|
||||
corr += (mixingPos[i] * compare[i] +
|
||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
|
||||
corr += (mixingPos[i + 2] * compare[i + 2] +
|
||||
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm;
|
||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
||||
}
|
||||
|
||||
// update normalizer with last samples of this round
|
||||
|
@ -1045,27 +1044,24 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)
|
|||
/// Calculate cross-correlation
|
||||
double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
|
||||
{
|
||||
double corr;
|
||||
double norm;
|
||||
float corr;
|
||||
float norm;
|
||||
int i;
|
||||
|
||||
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||
// in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
|
||||
if (((ulongptr)mixingPos) & 15) return -1e50;
|
||||
#endif
|
||||
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = (channels * overlapLength) & -8;
|
||||
|
||||
corr = norm = 0;
|
||||
// Same routine for stereo and mono. For Stereo, unroll by factor of 2.
|
||||
// For mono it's same routine yet unrollsd by factor of 4.
|
||||
for (i = 0; i < channels * overlapLength; i += 4)
|
||||
// Same routine for stereo and mono
|
||||
for (i = 0; i < ilength; i ++)
|
||||
{
|
||||
corr += mixingPos[i] * compare[i] +
|
||||
mixingPos[i + 1] * compare[i + 1];
|
||||
|
||||
norm += mixingPos[i] * mixingPos[i] +
|
||||
mixingPos[i + 1] * mixingPos[i + 1];
|
||||
|
||||
// unroll the loop for better CPU efficiency:
|
||||
corr += mixingPos[i + 2] * compare[i + 2] +
|
||||
mixingPos[i + 3] * compare[i + 3];
|
||||
|
||||
norm += mixingPos[i + 2] * mixingPos[i + 2] +
|
||||
mixingPos[i + 3] * mixingPos[i + 3];
|
||||
corr += mixingPos[i] * compare[i];
|
||||
norm += mixingPos[i] * mixingPos[i];
|
||||
}
|
||||
|
||||
anorm = norm;
|
||||
|
@ -1076,7 +1072,7 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, do
|
|||
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
|
||||
double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
|
||||
{
|
||||
double corr;
|
||||
float corr;
|
||||
int i;
|
||||
|
||||
corr = 0;
|
||||
|
@ -1087,14 +1083,13 @@ double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *c
|
|||
norm -= mixingPos[-i] * mixingPos[-i];
|
||||
}
|
||||
|
||||
// Same routine for stereo and mono. For Stereo, unroll by factor of 2.
|
||||
// For mono it's same routine yet unrollsd by factor of 4.
|
||||
for (i = 0; i < channels * overlapLength; i += 4)
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = (channels * overlapLength) & -8;
|
||||
|
||||
// Same routine for stereo and mono
|
||||
for (i = 0; i < ilength; i ++)
|
||||
{
|
||||
corr += mixingPos[i] * compare[i] +
|
||||
mixingPos[i + 1] * compare[i + 1] +
|
||||
mixingPos[i + 2] * compare[i + 2] +
|
||||
mixingPos[i + 3] * compare[i + 3];
|
||||
corr += mixingPos[i] * compare[i];
|
||||
}
|
||||
|
||||
// update normalizer with last samples of this round
|
||||
|
|
|
@ -80,7 +80,7 @@ double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &a
|
|||
// Compile-time define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION is provided
|
||||
// for choosing if this little cheating is allowed.
|
||||
|
||||
#ifdef SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION
|
||||
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||
// Little cheating allowed, return valid correlation only for
|
||||
// aligned locations, meaning every second round for stereo sound.
|
||||
|
||||
|
|
Loading…
Reference in New Issue