//////////////////////////////////////////////////////////////////////////////// /// /// Beats-per-minute (BPM) detection routine. /// /// The beat detection algorithm works as follows: /// - Use function 'inputSamples' to input a chunks of samples to the class for /// analysis. It's a good idea to enter a large sound file or stream in smallish /// chunks of around few kilosamples in order not to extinguish too much RAM memory. /// - Inputted sound data is decimated to approx 500 Hz to reduce calculation burden, /// which is basically ok as low (bass) frequencies mostly determine the beat rate. /// Simple averaging is used for anti-alias filtering because the resulting signal /// quality isn't of that high importance. /// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by /// taking absolute value that's smoothed by sliding average. Signal levels that /// are below a couple of times the general RMS amplitude level are cut away to /// leave only notable peaks there. /// - Repeating sound patterns (e.g. beats) are detected by calculating short-term /// autocorrelation function of the enveloped signal. /// - After whole sound data file has been analyzed as above, the bpm level is /// detected by function 'getBpm' that finds the highest peak of the autocorrelation /// function, calculates it's precise location and converts this reading to bpm's. /// /// Author : Copyright (c) Olli Parviainen /// Author e-mail : oparviai 'at' iki.fi /// SoundTouch WWW: http://www.surina.net/soundtouch /// //////////////////////////////////////////////////////////////////////////////// // // Last changed : $Date: 2015-02-21 23:24:29 +0200 (Sat, 21 Feb 2015) $ // File revision : $Revision: 4 $ // // $Id: BPMDetect.cpp 202 2015-02-21 21:24:29Z oparviai $ // //////////////////////////////////////////////////////////////////////////////// // // License : // // SoundTouch audio processing library // Copyright (c) Olli Parviainen // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////////////////////// #include #include #include #include #include "FIFOSampleBuffer.h" #include "PeakFinder.h" #include "BPMDetect.h" using namespace soundtouch; #define INPUT_BLOCK_SAMPLES 2048 #define DECIMATED_BLOCK_SAMPLES 256 /// decay constant for calculating RMS volume sliding average approximation /// (time constant is about 10 sec) const float avgdecay = 0.99986f; /// Normalization coefficient for calculating RMS sliding average approximation. const float avgnorm = (1 - avgdecay); //////////////////////////////////////////////////////////////////////////////// // Enable following define to create bpm analysis file: // #define _CREATE_BPM_DEBUG_FILE #ifdef _CREATE_BPM_DEBUG_FILE #define DEBUGFILE_NAME "c:\\temp\\soundtouch-bpm-debug.txt" static void _SaveDebugData(const float *data, int minpos, int maxpos, double coeff) { FILE *fptr = fopen(DEBUGFILE_NAME, "wt"); int i; if (fptr) { printf("\n\nWriting BPM debug data into file " DEBUGFILE_NAME "\n\n"); for (i = minpos; i < maxpos; i ++) { fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]); } fclose(fptr); } } #else #define _SaveDebugData(a,b,c,d) #endif //////////////////////////////////////////////////////////////////////////////// BPMDetect::BPMDetect(int numChannels, int aSampleRate) { this->sampleRate = aSampleRate; this->channels = numChannels; decimateSum = 0; decimateCount = 0; envelopeAccu = 0; // Initialize RMS volume accumulator to RMS level of 1500 (out of 32768) that's // safe initial RMS signal level value for song data. This value is then adapted // to the actual level during processing. #ifdef SOUNDTOUCH_INTEGER_SAMPLES // integer samples RMSVolumeAccu = (1500 * 1500) / avgnorm; #else // float samples, scaled to range [-1..+1[ RMSVolumeAccu = (0.045f * 0.045f) / avgnorm; #endif // choose decimation factor so that result is approx. 1000 Hz decimateBy = sampleRate / 1000; assert(decimateBy > 0); assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES); // Calculate window length & starting item according to desired min & max bpms windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM); windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM); assert(windowLen > windowStart); // allocate new working objects xcorr = new float[windowLen]; memset(xcorr, 0, windowLen * sizeof(float)); // allocate processing buffer buffer = new FIFOSampleBuffer(); // we do processing in mono mode buffer->setChannels(1); buffer->clear(); } BPMDetect::~BPMDetect() { delete[] xcorr; delete buffer; } /// convert to mono, low-pass filter & decimate to about 500 Hz. /// return number of outputted samples. /// /// Decimation is used to remove the unnecessary frequencies and thus to reduce /// the amount of data needed to be processed as calculating autocorrelation /// function is a very-very heavy operation. /// /// Anti-alias filtering is done simply by averaging the samples. This is really a /// poor-man's anti-alias filtering, but it's not so critical in this kind of application /// (it'd also be difficult to design a high-quality filter with steep cut-off at very /// narrow band) int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples) { int count, outcount; LONG_SAMPLETYPE out; assert(channels > 0); assert(decimateBy > 0); outcount = 0; for (count = 0; count < numsamples; count ++) { int j; // convert to mono and accumulate for (j = 0; j < channels; j ++) { decimateSum += src[j]; } src += j; decimateCount ++; if (decimateCount >= decimateBy) { // Store every Nth sample only out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels)); decimateSum = 0; decimateCount = 0; #ifdef SOUNDTOUCH_INTEGER_SAMPLES // check ranges for sure (shouldn't actually be necessary) if (out > 32767) { out = 32767; } else if (out < -32768) { out = -32768; } #endif // SOUNDTOUCH_INTEGER_SAMPLES dest[outcount] = (SAMPLETYPE)out; outcount ++; } } return outcount; } // Calculates autocorrelation function of the sample history buffer void BPMDetect::updateXCorr(int process_samples) { int offs; SAMPLETYPE *pBuffer; assert(buffer->numSamples() >= (uint)(process_samples + windowLen)); pBuffer = buffer->ptrBegin(); #pragma omp parallel for for (offs = windowStart; offs < windowLen; offs ++) { LONG_SAMPLETYPE sum; int i; sum = 0; for (i = 0; i < process_samples; i ++) { sum += pBuffer[i] * pBuffer[i + offs]; // scaling the sub-result shouldn't be necessary } // xcorr[offs] *= xcorr_decay; // decay 'xcorr' here with suitable coefficients // if it's desired that the system adapts automatically to // various bpms, e.g. in processing continouos music stream. // The 'xcorr_decay' should be a value that's smaller than but // close to one, and should also depend on 'process_samples' value. xcorr[offs] += (float)sum; } } // Calculates envelope of the sample data void BPMDetect::calcEnvelope(SAMPLETYPE *samples, int numsamples) { const static double decay = 0.7f; // decay constant for smoothing the envelope const static double norm = (1 - decay); int i; LONG_SAMPLETYPE out; double val; for (i = 0; i < numsamples; i ++) { // calc average RMS volume RMSVolumeAccu *= avgdecay; val = (float)fabs((float)samples[i]); RMSVolumeAccu += val * val; // cut amplitudes that are below cutoff ~2 times RMS volume // (we're interested in peak values, not the silent moments) if (val < 0.5 * sqrt(RMSVolumeAccu * avgnorm)) { val = 0; } // smooth amplitude envelope envelopeAccu *= decay; envelopeAccu += val; out = (LONG_SAMPLETYPE)(envelopeAccu * norm); #ifdef SOUNDTOUCH_INTEGER_SAMPLES // cut peaks (shouldn't be necessary though) if (out > 32767) out = 32767; #endif // SOUNDTOUCH_INTEGER_SAMPLES samples[i] = (SAMPLETYPE)out; } } void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples) { SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES]; // iterate so that max INPUT_BLOCK_SAMPLES processed per iteration while (numSamples > 0) { int block; int decSamples; block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples; // decimate. note that converts to mono at the same time decSamples = decimate(decimated, samples, block); samples += block * channels; numSamples -= block; // envelope new samples and add them to buffer calcEnvelope(decimated, decSamples); buffer->putSamples(decimated, decSamples); } // when the buffer has enought samples for processing... if ((int)buffer->numSamples() > windowLen) { int processLength; // how many samples are processed processLength = (int)buffer->numSamples() - windowLen; // ... calculate autocorrelations for oldest samples... updateXCorr(processLength); // ... and remove them from the buffer buffer->receiveSamples(processLength); } } void BPMDetect::removeBias() { int i; float minval = 1e12f; // arbitrary large number for (i = windowStart; i < windowLen; i ++) { if (xcorr[i] < minval) { minval = xcorr[i]; } } for (i = windowStart; i < windowLen; i ++) { xcorr[i] -= minval; } } float BPMDetect::getBpm() { double peakPos; double coeff; PeakFinder peakFinder; coeff = 60.0 * ((double)sampleRate / (double)decimateBy); // save bpm debug analysis data if debug data enabled _SaveDebugData(xcorr, windowStart, windowLen, coeff); // remove bias from xcorr data removeBias(); // find peak position peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen); assert(decimateBy != 0); if (peakPos < 1e-9) return 0.0; // detection failed. // calculate BPM return (float) (coeff / peakPos); }